In [5]:
import numpy as np
import pandas as pd
import os

# Define directories
processed_tables_dir = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/processed_tables"
output_dir = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights"
os.makedirs(output_dir, exist_ok=True)

# Define country groups for EU28 using two-letter ISO country codes
core_countries = {'AT', 'BE', 'DK', 'FI', 'DE', 'GB', 'FR', 'IT', 'IE', 'LU', 'NL', 'SE'}
periphery_countries = {'BG', 'CY', 'CZ', 'ES', 'EE', 'GR', 'HR', 'HU', 'LV', 'LT', 'MT', 'PL', 'PT', 'RO', 'SK', 'SI'}
eu28_countries = core_countries.union(periphery_countries)

# ---------------------------------------------------
# Updated Function for Individual CPI Weights
# ---------------------------------------------------
def save_individual_cpi_weights_from_processed(final_demand_table, year):
    """
    Save pre-calculated individual CPI weights for each country.
    Assumes these weights are already present in the processed final_demand_table.
    """
    # Filter columns containing "_cpi_weight"
    cpi_weight_columns = [col for col in final_demand_table.columns if col.endswith("_cpi_weight")]

    # Extract and combine individual weights
    individual_weights_df = final_demand_table[cpi_weight_columns].copy()

    # Remove the "_cpi_weight" suffix from column names
    individual_weights_df.columns = [col.replace("_cpi_weight", "") for col in individual_weights_df.columns]

    # Save the DataFrame with year included in the file name
    output_file = os.path.join(output_dir, f"individual_cpi_weights_{year}.csv")
    individual_weights_df.to_csv(output_file)
    print(f"Individual CPI weights saved to {output_file}")

# ---------------------------------------------------
# Function to Calculate CPI Weights for EU28 and ROW
# ---------------------------------------------------
def calculate_eu28_and_row_weights(eu28_countries, final_demand_table):
    # Identify all country codes from columns starting with "ip_"
    all_countries = {col.split("_")[1] for col in final_demand_table.columns if col.startswith("ip_")}
    extra_eu_countries = all_countries - eu28_countries

    # Household demand columns for EU28 countries
    eu28_columns = [f"ip_{country}_P3_S14" for country in eu28_countries 
                    if f"ip_{country}_P3_S14" in final_demand_table.columns]

    # Household demand columns for ROW (extra-EU) countries
    row_columns = [f"ip_{country}_P3_S14" for country in extra_eu_countries 
                   if f"ip_{country}_P3_S14" in final_demand_table.columns]

    # Calculate total EU28 and ROW demand for each sector
    eu28_demand = final_demand_table[eu28_columns].sum(axis=1) if eu28_columns else pd.Series(0, index=final_demand_table.index)
    row_demand = final_demand_table[row_columns].sum(axis=1) if row_columns else pd.Series(0, index=final_demand_table.index)

    # Normalize to get CPI shares
    eu28_weights = eu28_demand / eu28_demand.sum() if eu28_demand.sum() > 0 else pd.Series(0, index=final_demand_table.index)
    row_weights = row_demand / row_demand.sum() if row_demand.sum() > 0 else pd.Series(0, index=final_demand_table.index)

    # Combine weights into a DataFrame
    weights_df = pd.DataFrame({
        "EU28": eu28_weights,
        "ROW": row_weights
    })

    return weights_df

# ---------------------------------------------------
# Main Process: Loop over available years
# ---------------------------------------------------
# We assume that each year's final demand table is located in a subdirectory of processed_tables_dir
# and that the file is named like "final_demand_table_{year}.csv".
for year in os.listdir(processed_tables_dir):
    year_dir = os.path.join(processed_tables_dir, year)
    if os.path.isdir(year_dir) and year.isdigit():
        final_demand_table_path = os.path.join(year_dir, f"final_demand_table_{year}.csv")
        if not os.path.exists(final_demand_table_path):
            print(f"Final demand table for year {year} not found at {final_demand_table_path}. Skipping.")
            continue
        
        print(f"Processing year {year}...")
        # Load the final demand table
        final_demand_table = pd.read_csv(final_demand_table_path, index_col=0)

        # Drop the "cpi" column if it exists
        if "cpi" in final_demand_table.columns:
            final_demand_table = final_demand_table.drop(columns=["cpi"])

        # Remove the 'op_' prefix from row indices for cleaner output
        final_demand_table.index = final_demand_table.index.str.replace(r'^op_', '', regex=True)

        # Save individual country weights (pre-calculated)
        save_individual_cpi_weights_from_processed(final_demand_table, year)

        # Calculate EU28 and ROW weights
        eu28_and_row_weights = calculate_eu28_and_row_weights(eu28_countries, final_demand_table)

        # Save the EU28 and ROW weights with year included in the file name
        output_file = os.path.join(output_dir, f"eu28_and_row_cpi_weights_{year}.csv")
        eu28_and_row_weights.to_csv(output_file)
        print(f"CPI weights for EU28 and ROW for year {year} saved to {output_file}")


Processing year 2010...
Individual CPI weights saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights\individual_cpi_weights_2010.csv
CPI weights for EU28 and ROW for year 2010 saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights\eu28_and_row_cpi_weights_2010.csv
Processing year 2011...
Individual CPI weights saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights\individual_cpi_weights_2011.csv
CPI weights for EU28 and ROW for year 2011 saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights\eu28_and_row_cpi_weights_2011.csv
Processing year 2012...
Individual CPI weights saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights\individual_cpi_weights_2012.csv
CPI weights for EU28 and ROW for year 2012 saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/cpi_weights\eu28_and_row_cpi_weights_2012.csv
Processing year 2013...
Individual CPI weights