In [None]:
import json
import pandas as pd
import numpy as np

# Reload JSON files after kernel reset
with open("/mnt/data/formal_volunteering_2022.json", "r", encoding="utf-8") as f:
    formal_data = json.load(f)

with open("/mnt/data/informal_volunteering_2022.json", "r", encoding="utf-8") as f:
    informal_data = json.load(f)

# Function to generate fake year data while preserving structure and totals
def generate_fake_year_data(original_data, years=[2002, 2012, 2016]):
    fake_datasets = {}

    for year in years:
        year_data = {}
        for section, entries in original_data.items():
            if section == "Total":
                total_entry = next((x for x in entries if "all_volunteers" in x), None)
                base_total = total_entry["all_volunteers"]
                year_total = round(base_total * np.random.uniform(0.85, 1.15), 1)

                data_entries = [x for x in entries if "name" in x]
                df = pd.DataFrame(data_entries)

                weights = df["count"] / df["count"].sum()
                df["count"] = np.round(weights * year_total, 1)
                year_data["Total"] = [{"all_volunteers": year_total}] + df.to_dict(orient="records")
            else:
                df = pd.DataFrame(entries)
                grouped = df.groupby("category")

                new_entries = []
                for cat, group in grouped:
                    cat_total = group["count"].sum()
                    scale = np.random.uniform(0.85, 1.15)
                    target_total = cat_total * scale

                    weights = group["count"] / cat_total
                    new_counts = np.round(weights * target_total, 1)

                    group = group.copy()
                    group["count"] = new_counts
                    new_entries.extend(group.to_dict(orient="records"))

                year_data[section] = new_entries
        fake_datasets[str(year)] = year_data
    return fake_datasets

# Generate and save
fake_formal = generate_fake_year_data(formal_data)
fake_informal = generate_fake_year_data(informal_data)

with open("/mnt/data/formal_volunteering_fake_data.json", "w", encoding="utf-8") as f:
    json.dump(fake_formal, f, indent=2)

with open("/mnt/data/informal_volunteering_fake_data.json", "w", encoding="utf-8") as f:
    json.dump(fake_informal, f, indent=2)

(fake_formal.keys(), fake_informal.keys())
