In [1]:
import numpy as np
import pandas as pd
import os

# File paths
final_demand_table_path = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/processed_tables/2019/final_demand_table_2019.csv"

# Load the final demand table
final_demand_table = pd.read_csv(final_demand_table_path, index_col=0)

# Drop cpi column
if "cpi" in final_demand_table.columns:
    final_demand_table = final_demand_table.drop(columns=["cpi"])

# Remove the 'op_' prefix from row indices for cleaner output
final_demand_table.index = final_demand_table.index.str.replace(r'^op_', '', regex=True)
# Define country groups for different samples using two-letter ISO country codes
core_countries = {'AT', 'BE', 'DK', 'FI', 'DE', 'GB', 'FR', 'IT', 'IE', 'LU', 'NL', 'SE'}
periphery_countries = {'BG', 'CY', 'CZ', 'ES', 'EE', 'GR', 'HR', 'HU', 'LV', 'LT', 'MT', 'PL', 'PT', 'RO', 'SK', 'SI'}
eu28_countries = core_countries.union(periphery_countries)

# Output directory for results
output_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results'
os.makedirs(output_dir, exist_ok=True)

# Function to calculate CPI shares from the final demand table
def save_cpi_weights_sample(sample_name, country_group, final_demand_table):
    sample_weights = {}

    # Filter valid country codes based on column names
    valid_countries = set(
        col.split("_")[1] for col in final_demand_table.columns
        if col.startswith("ip_") and len(col.split("_")) > 2 and len(col.split("_")[1]) == 2)  # Ensures only valid columns like "ip_AT_P3_S14" are considered

    # Intersect with the given country group
    valid_country_group = country_group.intersection(valid_countries)

    for country in valid_country_group:
        cpi_shares = get_cpi_shares(country, final_demand_table)
        if cpi_shares is not None:
            sample_weights[country] = cpi_shares

    # Create a DataFrame
    weights_df = pd.DataFrame(sample_weights)

    # Save the DataFrame
    output_file = os.path.join(output_dir, f"{sample_name}_cpi_weights.csv")
    weights_df.to_csv(output_file)
    print(f"CPI weights for {sample_name} saved.")


# Generate and save CPI weights for each sample
# Individual weights: each country has its own CPI shares
save_cpi_weights_sample("individual", final_demand_table.columns.str.split("_").str[1].unique(), final_demand_table)  # Each country with its own weights

# Core countries
save_cpi_weights_sample("core", core_countries, final_demand_table)

# Periphery countries
save_cpi_weights_sample("periphery", periphery_countries, final_demand_table)

# EU28 countries (all)
save_cpi_weights_sample("eu28", eu28_countries, final_demand_table)



NameError: name 'get_cpi_shares' is not defined