<a href="https://colab.research.google.com/github/BbekShr/Coding_exercise/blob/main/Flatten_datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import json
import csv
import os

def flatten_record(record):
    flattened = {}

    for key, value in record.items():
        if isinstance(value, dict):  # Handle dictionaries (e.g., { '$oid': '...' })
            for sub_key, sub_value in value.items():
                if sub_key == '$oid':  # Special handling for '$oid'
                    flattened[key] = sub_value
                else:
                    flattened[f"{key}_{sub_key}"] = sub_value
        elif isinstance(value, list):  # Handle lists (e.g., [{'barcode': '...', 'finalPrice': '...'}])
            for idx, item in enumerate(value):
                if isinstance(item, dict):
                    for sub_key, sub_value in item.items():
                        flattened[f"{key}_{idx}_{sub_key}"] = sub_value
                else:
                    flattened[f"{key}_{idx}"] = item
        else:
            flattened[key] = value

    return flattened

def json_to_csv(json_file_path, csv_file_path):
    with open(json_file_path, 'r') as file:
        data = [json.loads(line.strip()) for line in file]

    # Flatten records and create headers dynamically
    flattened_data = [flatten_record(record) for record in data]
    header = {key for record in flattened_data for key in record.keys()}
    header = sorted(header)

    # Save the CSV to the specified path
    with open(csv_file_path, 'w', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=header)
        writer.writeheader()
        writer.writerows(flattened_data)

    print(f"CSV saved to {csv_file_path}")

def convert_json_files_in_folder(folder_path):
    # Create a folder named 'flatten_data' if it doesn't exist
    output_folder = os.path.join(folder_path, 'flatten_data')
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            json_file_path = os.path.join(folder_path, filename)
            csv_file_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}.csv")

            # Convert JSON to CSV
            json_to_csv(json_file_path, csv_file_path)

folder_path = '/content/drive/MyDrive/Colab Notebooks/Data folder'
convert_json_files_in_folder(folder_path)

CSV saved to /content/drive/MyDrive/Colab Notebooks/Data folder/flatten_data/users.csv
CSV saved to /content/drive/MyDrive/Colab Notebooks/Data folder/flatten_data/receipts.csv
CSV saved to /content/drive/MyDrive/Colab Notebooks/Data folder/flatten_data/brands.csv
