In [4]:
import pandas as pd
import json
from pathlib import Path

# Set up folder paths
# This gets the current working directory
base_folder = Path.cwd()

# Define where the input CSV files are located
input_folder = base_folder/"02_data_cleaning"

# Define where the output JSON files should be saved
output_folder = base_folder/"04_csv_to_json_conversion"


# Define a function to convert a CSV file to JSON
def convert_csv_to_json(csv_path: Path, json_path: Path):
    try:
        df = pd.read_csv(csv_path)
        records = df.to_dict(orient="records")

        with open(json_path, "w", encoding="utf-8") as f:
            json.dump(records, f, indent=4, ensure_ascii=False)

        print(f"Converted: {csv_path.name} to {json_path.name}")

    except Exception as e:
        print(f"Error while converting {csv_path.name}: {e}")

# Loop through all CSV files in the input folder
csv_files = input_folder.glob("*.csv")

for csv_file in csv_files:
    json_file = output_folder/f"{csv_file.stem}.json"

    # Conversion function call
    convert_csv_to_json(csv_file, json_file)

print("\nAll CSV files converted to JSON successfully")


Converted: 02_departments_data.csv to 02_departments_data.json
Converted: 03_staff_data.csv to 03_staff_data.json
Converted: 04_patients_data.csv to 04_patients_data.json
Converted: 05_visits_data.csv to 05_visits_data.json
Converted: 06_appointments_data.csv to 06_appointments_data.json
Converted: 07_billing_data.csv to 07_billing_data.json
Converted: 08_tests_data.csv to 08_tests_data.json
Converted: 09_test_results_data.csv to 09_test_results_data.json

All CSV files converted to JSON successfully


In [4]:
# Conversion validation script

import pandas as pd
import json
from pathlib import Path

# Set up folder paths
# This gets the current working directory
base_folder = Path.cwd()

# Define where the input CSV files are located
input_folder = base_folder/"02_data_cleaning"

# Define where the output JSON files should be saved
output_folder = base_folder/"04_csv_to_json_conversion"

# Loop through each CSV file
for csv_file in input_folder.glob("*.csv"):
    json_file = output_folder/f"{csv_file.stem}.json"

    try:
        df_csv = pd.read_csv(csv_file)
        with open(json_file, "r", encoding="utf-8") as f:
            json_data = json.load(f)

        # Basic checks
        csv_count = len(df_csv)
        json_count = len(json_data)

        if csv_count == json_count:
            print(f"{csv_file.name}: Record count matched, CSV={csv_count}, JSON={json_count}")
        else:
            print(f"{csv_file.name}: Mismatch CSV={csv_count}, JSON={json_count}")

    except Exception as e:
        print(f"Error while validating {csv_file.name}: {e}")


02_departments_data.csv: Record count matched, CSV=10, JSON=10
03_staff_data.csv: Record count matched, CSV=500, JSON=500
04_patients_data.csv: Record count matched, CSV=50000, JSON=50000
05_visits_data.csv: Record count matched, CSV=200000, JSON=200000
06_appointments_data.csv: Record count matched, CSV=100000, JSON=100000
07_billing_data.csv: Record count matched, CSV=121124, JSON=121124
08_tests_data.csv: Record count matched, CSV=100, JSON=100
09_test_results_data.csv: Record count matched, CSV=500000, JSON=500000
