In [2]:
import pandas as pd
import json
import pyarrow as pa
import pyarrow.parquet as pq
import fastavro
from io import BytesIO
import os

In [None]:
# Create sample data
data = {
    'name': ['John', 'Alice', 'Bob'],
    'age': [30, 25, 35],
    'city': ['New York', 'San Francisco', 'Chicago']
}

In [None]:
# Create output directory if it doesn't exist
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)

In [None]:
# 1. CSV Example
def csv_example():
    df = pd.DataFrame(data)
    csv_file = f'{output_dir}/sample.csv'
    df.to_csv(csv_file, index=False)
    print(f"CSV file created: {csv_file}")
    
    # Read back CSV
    df_read = pd.read_csv(csv_file)
    print("\nRead CSV data:")
    print(df_read)

In [None]:
# 2. JSON Example
def json_example():
    json_file = f'{output_dir}/sample.json'
    with open(json_file, 'w') as f:
        json.dump(data, f)
    print(f"\nJSON file created: {json_file}")
    
    # Read back JSON
    with open(json_file, 'r') as f:
        data_read = json.load(f)
    print("\nRead JSON data:")
    print(data_read)

In [None]:
# 3. Parquet Example
def parquet_example():
    df = pd.DataFrame(data)
    parquet_file = f'{output_dir}/sample.parquet'
    df.to_parquet(parquet_file)
    print(f"\nParquet file created: {parquet_file}")
    
    # Read back Parquet
    df_read = pd.read_parquet(parquet_file)
    print("\nRead Parquet data:")
    print(df_read)

In [None]:
# 4. Avro Example
def avro_example():
    schema = {
        'type': 'record',
        'name': 'User',
        'fields': [
            {'name': 'name', 'type': 'string'},
            {'name': 'age', 'type': 'int'},
            {'name': 'city', 'type': 'string'}
        ]
    }
    
    # Prepare records
    records = []
    for i in range(len(data['name'])):
        records.append({
            'name': data['name'][i],
            'age': data['age'][i],
            'city': data['city'][i]
        })
    
    avro_file = f'{output_dir}/sample.avro'
    with open(avro_file, 'wb') as f:
        fastavro.writer(f, schema, records)
    print(f"\nAvro file created: {avro_file}")
    
    # Read back Avro
    with open(avro_file, 'rb') as f:
        reader = fastavro.reader(f)
        print("\nRead Avro data:")
        for record in reader:
            print(record)

In [None]:
if __name__ == "__main__":
    print("Demonstrating different data formats:\n")
    
    csv_example()
    json_example()
    parquet_example()
    avro_example()