In [None]:
import pandas as pd
import numpy as np
import json

# region and population reference values for 2022 (based on provided data)
regions = [
    ("Austria", 7555),
    ("Burgenland", 255),
    ("Kärnten", 481.8),
    ("Niederösterreich", 1435.3),
    ("Oberösterreich", 1256.5),
    ("Salzburg", 471.3),
    ("Steiermark", 1065.4),
    ("Tirol", 641.4),
    ("Vorarlberg", 333.1),
    ("Wien", 1615.4)
]

years = [2006, 2012, 2016, 2022]

# Base real data for Austria for 2006, 2012, 2016, 2022
austria_real = {
    2006: {"perc_volunteers_from_pop": 44.0, "perc_formal_from_pop": 28.0, "perc_informal_from_pop": 27.0},
    2012: {"perc_volunteers_from_pop": 46.0, "perc_formal_from_pop": 28.0, "perc_informal_from_pop": 31.0},
    2016: {"perc_volunteers_from_pop": 46.0, "perc_formal_from_pop": 31.0, "perc_informal_from_pop": 30.0},
    2022: {"perc_volunteers_from_pop": 49.4, "perc_formal_from_pop": 25.8, "perc_informal_from_pop": 36.7}
}

# Real data for Austria for 2022 for the remaining fields (from provided JSON)
austria_full_2022 = {
    "total_pop": 7555,
    "total_volunteers": 3728.5,
    "perc_volunteers_from_pop":49.4,
    "perc_formal_from_pop":25.8,
    "perc_informal_from_pop":36.7,
    "avg_hours_vlntrs": 6.86,
    "avg_hours_formal":4.82,
    "avg_hours_informal":5.96,
    "25_hrs_vlntrs":0.77,     
    "median_hours_vlntrs": 2.89,
    "75_hrs_vlntrs":8,
    "25_hrs_formal":0.5,     
    "median_hours_formal": 2,
    "75_hrs_formal":5,
    "25_hrs_informal":0.48,     
    "median_hours_informal": 2.16,
    "75_hrs_informal":7
}

# Real data for other regions for 2022
region_full_2022 = {
    "Burgenland": {
        "total_pop":255, "total_volunteers": 118.7, "perc_volunteers_from_pop":46.5, "perc_formal_from_pop":24.5,
        "perc_informal_from_pop":33.6, "avg_hours_vlntrs": 6.06, "avg_hours_formal":3.23, "avg_hours_informal":6.13,
        "25_hrs_vlntrs":0.78, "median_hours_vlntrs": 2.66, "75_hrs_vlntrs":7.93, "25_hrs_formal":0.49, "median_hours_formal": 1.84,
        "75_hrs_formal":3.5, "25_hrs_informal":0.58, "median_hours_informal": 2.3, "75_hrs_informal":8.47
    },
    "Kärnten": {
        "total_pop":481.8, "total_volunteers": 241.5, "perc_volunteers_from_pop":50.1, "perc_formal_from_pop":24.5,
        "perc_informal_from_pop":38.5, "avg_hours_vlntrs": 6.69, "avg_hours_formal":4.68, "avg_hours_informal":5.85,
        "25_hrs_vlntrs":1, "median_hours_vlntrs": 2.84, "75_hrs_vlntrs":7.93, "25_hrs_formal":0.7, "median_hours_formal": 2,
        "75_hrs_formal":5, "25_hrs_informal":0.58, "median_hours_informal": 2.33, "75_hrs_informal":7.93
    },
    "Niederösterreich": {
        "total_pop":1435.3, "total_volunteers": 754.8, "perc_volunteers_from_pop":52.6, "perc_formal_from_pop":27.9,
        "perc_informal_from_pop":37.6, "avg_hours_vlntrs": 7.44, "avg_hours_formal":4.89, "avg_hours_informal":6.85,
        "25_hrs_vlntrs":0.77, "median_hours_vlntrs": 2.62, "75_hrs_vlntrs":9.47, "25_hrs_formal":0.47, "median_hours_formal": 1.74,
        "75_hrs_formal":4.93, "25_hrs_informal":0.48, "median_hours_informal": 2.29, "75_hrs_informal":8.69
    },
    "Oberösterreich": {
        "total_pop":1256.5, "total_volunteers": 641.6, "perc_volunteers_from_pop":51.1, "perc_formal_from_pop":31.0,
        "perc_informal_from_pop":36.7, "avg_hours_vlntrs": 7.27, "avg_hours_formal":5.27, "avg_hours_informal":5.8,
        "25_hrs_vlntrs":0.82, "median_hours_vlntrs": 3, "75_hrs_vlntrs":8, "25_hrs_formal":0.48, "median_hours_formal": 1.87,
        "75_hrs_formal":5.6, "25_hrs_informal":0.47, "median_hours_informal": 2.03, "75_hrs_informal":6
    },
    "Salzburg": {
        "total_pop":471.3, "total_volunteers": 253.3, "perc_volunteers_from_pop":53.8, "perc_formal_from_pop":29.8,
        "perc_informal_from_pop":38.1, "avg_hours_vlntrs": 7.49, "avg_hours_formal":6.12, "avg_hours_informal":5.79,
        "25_hrs_vlntrs":0.93, "median_hours_vlntrs": 3.54, "75_hrs_vlntrs":9.33, "25_hrs_formal":0.7, "median_hours_formal": 2.47,
        "75_hrs_formal":7, "25_hrs_informal":0.52, "median_hours_informal": 2.8, "75_hrs_informal":8
    },
    "Steiermark": {
        "total_pop":1065.4, "total_volunteers": 519.3, "perc_volunteers_from_pop":48.7, "perc_formal_from_pop":25.5,
        "perc_informal_from_pop":35.8, "avg_hours_vlntrs": 6.65, "avg_hours_formal":4.42, "avg_hours_informal":5.99,
        "25_hrs_vlntrs":0.96, "median_hours_vlntrs": 3.03, "75_hrs_vlntrs":8, "25_hrs_formal":0.58, "median_hours_formal": 2.02,
        "75_hrs_formal":5, "25_hrs_informal":0.7, "median_hours_informal": 2.53, "75_hrs_informal":7
    },
    "Tirol": {
        "total_pop":641.4, "total_volunteers": 349.8, "perc_volunteers_from_pop":54.5, "perc_formal_from_pop":33.2,
        "perc_informal_from_pop":39.8, "avg_hours_vlntrs": 6.58, "avg_hours_formal":4.24, "avg_hours_informal":5.62,
        "25_hrs_vlntrs":0.73, "median_hours_vlntrs": 3, "75_hrs_vlntrs":8.08, "25_hrs_formal":0.47, "median_hours_formal": 2,
        "75_hrs_formal":5.4, "25_hrs_informal":0.47, "median_hours_informal": 2, "75_hrs_informal":7
    },
    "Vorarlberg": {
        "total_pop":333.1, "total_volunteers": 176.8, "perc_volunteers_from_pop":53.1, "perc_formal_from_pop":29.7,
        "perc_informal_from_pop":37.4, "avg_hours_vlntrs": 6.7, "avg_hours_formal":3.81, "avg_hours_informal":6.5,
        "25_hrs_vlntrs":0.93, "median_hours_vlntrs": 3.06, "75_hrs_vlntrs":7.67, "25_hrs_formal":0.47, "median_hours_formal": 1.7,
        "75_hrs_formal":4.17, "25_hrs_informal":0.93, "median_hours_informal": 3, "75_hrs_informal":7.4
    },
    "Wien": {
        "total_pop":1615.4, "total_volunteers": 672.7, "perc_volunteers_from_pop":41.6, "perc_formal_from_pop":15.7,
        "perc_informal_from_pop":34.6, "avg_hours_vlntrs": 6.27, "avg_hours_formal":5.04, "avg_hours_informal":5.13,
        "25_hrs_vlntrs":0.58, "median_hours_vlntrs": 2.33, "75_hrs_vlntrs":6.97, "25_hrs_formal":0.62, "median_hours_formal": 1.87,
        "75_hrs_formal":4.35, "25_hrs_informal":0.47, "median_hours_informal": 1.7, "75_hrs_informal":5.83
    }
}

# Interpolation/extrapolation logic for each region, for each field except the ones with real data
# For simplicity, we'll use linear interpolation for percentages and average hours, and
# scale other values (such as total volunteers) proportionally.
# Correcting the Austria access for fields not in region_full_2022
records = []

for year in years:
    for region, pop in regions:
        rec = {"year": year, "region": region}

        # Population: assume it grows 0.3% per year, use 2022 as base for each region
        pop_2022 = dict(regions)[region]
        rec["total_pop"] = round(pop_2022 * ((1 - 0.003) ** (2022 - year)))  # backward calculation

        # Percentage fields (for Austria, use actual/interpolated; for others, interpolate toward 2022 value)
        for perc_field in ["perc_volunteers_from_pop", "perc_formal_from_pop", "perc_informal_from_pop"]:
            if region == "Austria":
                rec[perc_field] = austria_real[year][perc_field] if year in austria_real else austria_full_2022[perc_field]
            else:
                value_2022 = region_full_2022[region][perc_field]
                value_2022_AT = austria_full_2022[perc_field]
                value_year_AT = austria_real[year][perc_field]
                scale = value_year_AT / value_2022_AT 
                rec[perc_field] = round(value_2022 * scale, 2)

        # Other fields: interpolate or scale, except for 2022 where we keep the real values
        if year == 2022:
            if region == "Austria":
                rec.update({k: v for k, v in austria_full_2022.items() if k not in rec})
            else:
                rec.update({k: v for k, v in region_full_2022[region].items() if k not in rec})
        else:
            # Interpolate or scale all other fields for years < 2022
            # Just scale other values for demonstration purposes
            for k in austria_full_2022.keys():
                if k in rec: continue
                # For totals: scale by the change in perc_volunteers
                if k == "total_volunteers":
                    rec[k] = round(rec["total_pop"] * rec["perc_volunteers_from_pop"] / 100, 2)
                elif k.startswith("avg_hours_") or k.startswith("median_hours_") or k.startswith("25_hrs_") or k.startswith("75_hrs_"):

                    if region == "Austria":
                        ratio = (rec["perc_volunteers_from_pop"] / austria_full_2022["perc_volunteers_from_pop"])
                        value_2022 = austria_full_2022[k]
                    else:
                        ratio = (rec["perc_volunteers_from_pop"] / region_full_2022[region].get("perc_volunteers_from_pop"))                        
                        value_2022 = region_full_2022[region].get(k)
                    rec[k] = round(value_2022 * ratio, 2)
                else:
                    if region == "Austria":
                        value_2022 = austria_full_2022[k]
                    else:
                        value_2022 = region_full_2022[region].get(k)
                    rec[k] = round(value_2022, 2)

        records.append(rec)

# Save to JSON
with open("assets/Geo_interpolated_by_year.json", "w", encoding="utf-8") as f:
    json.dump(records, f, ensure_ascii=False, indent=2)

