In [None]:
import json
import numpy as np

# Load your cleaned 2022 JSON file
with open("assets/gender_comparison_data_2022.json", "r", encoding="utf-8") as f:
    data_2022 = json.load(f)

# Define target years
years = [2006, 2012, 2016]
output_data = {}

# Add 2022 as-is
output_data["2022"] = data_2022

# Helper function to generate fake year data
def generate_year_data(section_data):
    fake_section = []
    for item in section_data:
        # Determine keys to keep for identification
        key_name = None
        if "num_orgs" in item:
            key_name = "num_orgs"
        elif "task" in item:
            key_name = "task"
        elif "area" in item:
            key_name = "area"
        elif "hours_range/week" in item:
            key_name = "hours_range/week"

        # Total in 2022
        men_2022 = item["men_count"]
        women_2022 = item["women_count"]
        total_2022 = men_2022 + women_2022

        # Scale total up or down randomly
        scale = np.random.uniform(0.85, 1.15)
        total_new = round(total_2022 * scale, 1)

        # Adjust male percentage slightly
        men_perc_2022 = item["men_perc"]
        variation = np.random.uniform(-5, 5)
        men_perc_new = np.clip(men_perc_2022 + variation, 5, 95)
        women_perc_new = 100 - men_perc_new

        # Compute new counts
        men_new = round(total_new * men_perc_new / 100, 1)
        women_new = round(total_new * women_perc_new / 100, 1)

        # Store new record
        new_item = {
            key_name: item[key_name],
            "men_count": men_new,
            "men_perc": round(men_perc_new, 1),
            "women_count": women_new,
            "women_perc": round(women_perc_new, 1)
        }
        fake_section.append(new_item)

    return fake_section

# Loop over each target year
for year in years:
    year_data = {}
    for section_name, section_content in data_2022.items():
        year_data[section_name] = generate_year_data(section_content)
    output_data[str(year)] = year_data

# Save to new JSON
output_path = "assets/gender_comparison_data_multiyear.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(output_data, f, indent=2, ensure_ascii=False)

print(f"Saved to {output_path}")


Saved to assets/gender_comparison_data_multiyear.json
