In [None]:
# Import necessary libraries
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from fastavro import writer, parse_schema
import json
from sqlalchemy import create_engine
from datahub.metadata.schema_classes import MetadataChangeEventClass
from datahub.emitter.mce_builder import make_dataset_urn
from datahub.emitter.rest_emitter import DatahubRestEmitter
from grafana_api.grafana_face import GrafanaFace

In [None]:
# Define functions for converting CSV and JSON to Parquet and Avro formats

def csv_to_parquet(csv_file, parquet_file):
    df = pd.read_csv(csv_file)
    table = pa.Table.from_pandas(df)
    pq.write_table(table, parquet_file)
    print(f"CSV file {csv_file} successfully converted to Parquet format at {parquet_file}")

def json_to_parquet(json_file, parquet_file):
    df = pd.read_json(json_file)
    table = pa.Table.from_pandas(df)
    pq.write_table(table, parquet_file)
    print(f"JSON file {json_file} successfully converted to Parquet format at {parquet_file}")

def csv_to_avro(csv_file, avro_file, schema):
    df = pd.read_csv(csv_file)
    records = df.to_dict(orient='records')
    parsed_schema = parse_schema(schema)
    with open(avro_file, 'wb') as out:
        writer(out, parsed_schema, records)
    print(f"CSV file {csv_file} successfully converted to Avro format at {avro_file}")

def json_to_avro(json_file, avro_file, schema):
    with open(json_file) as f:
        records = json.load(f)
    parsed_schema = parse_schema(schema)
    with open(avro_file, 'wb') as out:
        writer(out, parsed_schema, records)
    print(f"JSON file {json_file} successfully converted to Avro format at {avro_file}")


In [None]:
# Define functions for saving data to database

def save_parquet_to_db(parquet_file, db_uri, table_name):
    df = pd.read_parquet(parquet_file)
    engine = create_engine(db_uri)
    df.to_sql(table_name, engine, if_exists='replace')
    print(f"Parquet file {parquet_file} successfully saved to database table {table_name}")

def save_avro_to_db(avro_file, db_uri, table_name, schema):
    with open(avro_file, 'rb') as f:
        reader = reader(f, schema)
        records = [record for record in reader]
    df = pd.DataFrame(records)
    engine = create_engine(db_uri)
    df.to_sql(table_name, engine, if_exists='replace')
    print(f"Avro file {avro_file} successfully saved to database table {table_name}")
