## Create Ragged Folder Structure

In [1]:
import os
import json
import random

def create_random_json():
    data = {
        "id": random.randint(1, 100),
        "name": random.choice(["Alice", "Bob", "Charlie", "Diana", "Eve"]),
        "details": {
            "age": random.randint(20, 50),
            "city": random.choice(["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"])
        },
        "scores": [random.randint(50, 100) for _ in range(random.randint(3, 6))]
    }
    return data
    
def create_ragged_structure(base_path):
    os.makedirs(base_path, exist_ok=True)
    for i in range(random.randint(3, 5)):
        subdir = os.path.join(base_path, f"folder_{i}")
        os.makedirs(subdir, exist_ok=True)
        for j in range(random.randint(2, 4)):
            file_path = os.path.join(subdir, f"file_{j}.json")
            with open(file_path, "w") as json_file:
                json.dump(create_random_json(), json_file, indent=4)
        if random.choice([True, False]):
            nested_dir = os.path.join(subdir, f"nested_{i}")
            os.makedirs(nested_dir, exist_ok=True)
            for k in range(random.randint(1, 3)):
                file_path = os.path.join(nested_dir, f"file_{k}.json")
                with open(file_path, "w") as json_file:
                    json.dump(create_random_json(), json_file, indent=4)


def enhance_ragged_structure(base_path):
    extensions = ['txt', 'csv', 'xml', 'md']
    for i in range(random.randint(2, 4)):
        subdir = os.path.join(base_path, f"extra_folder_{i}")
        os.makedirs(subdir, exist_ok=True)
        for j in range(random.randint(1, 3)):
            # Add JSON files
            file_path_json = os.path.join(subdir, f"extra_file_{j}.json")
            with open(file_path_json, "w") as json_file:
                json.dump(create_random_json(), json_file, indent=4)
            
            # Add files with other extensions
            ext = random.choice(extensions)
            file_path_other = os.path.join(subdir, f"extra_file_{j}.{ext}")
            with open(file_path_other, "w") as other_file:
                other_file.write(f"This is a random {ext.upper()} file.\n")
        
        # Add further nesting randomly
        if random.choice([True, False]):
            nested_dir = os.path.join(subdir, f"deep_nested_{i}")
            os.makedirs(nested_dir, exist_ok=True)
            for k in range(random.randint(1, 2)):
                file_path_other = os.path.join(nested_dir, f"deep_file_{k}.txt")
                with open(file_path_other, "w") as other_file:
                    other_file.write("This is a deeply nested text file.\n")

base_directory = "../ragged_dir/"
create_ragged_structure(base_directory)
enhance_ragged_structure(base_directory)


print(f"Ragged directory structure created at: {base_directory}")


Ragged directory structure created at: ../ragged_dir/


## Read and generate CSV form json files

In [2]:
import pandas as pd
import json
from pandas import json_normalize
import os

In [3]:
def make_dataframe(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    df = json_normalize(data)
    return df

In [4]:
json_Dataframes = []
for root, dirs, files in os.walk(base_directory):
    for file in files:
        if file.endswith(".json"):
            file_path = os.path.join(root, file)
            
            json_Dataframes.append({"file_name":file,"df":make_dataframe(file_path)})

In [6]:
output_dir = "./outputs/csv_files"
os.makedirs(output_dir, exist_ok = True)
for df in json_Dataframes:
    file_path = os.path.join(output_dir,f'{df["file_name"]}.csv')
    df["df"].to_csv(file_path)
    print(f"file saved to: {file_path}")

file saved to: ./outputs/csv_files/file_1.json.csv
file saved to: ./outputs/csv_files/file_0.json.csv
file saved to: ./outputs/csv_files/file_2.json.csv
file saved to: ./outputs/csv_files/file_1.json.csv
file saved to: ./outputs/csv_files/file_0.json.csv
file saved to: ./outputs/csv_files/file_2.json.csv
file saved to: ./outputs/csv_files/file_3.json.csv
file saved to: ./outputs/csv_files/file_1.json.csv
file saved to: ./outputs/csv_files/file_0.json.csv
file saved to: ./outputs/csv_files/file_2.json.csv
file saved to: ./outputs/csv_files/file_0.json.csv
file saved to: ./outputs/csv_files/extra_file_2.json.csv
file saved to: ./outputs/csv_files/extra_file_1.json.csv
file saved to: ./outputs/csv_files/extra_file_0.json.csv
file saved to: ./outputs/csv_files/extra_file_1.json.csv
file saved to: ./outputs/csv_files/extra_file_0.json.csv
file saved to: ./outputs/csv_files/extra_file_0.json.csv
file saved to: ./outputs/csv_files/extra_file_2.json.csv
file saved to: ./outputs/csv_files/extra