In [None]:
import pandas as pd
import os
import numpy as np


In [None]:
def split_index_to_multiindex(df):
    """
    Split the index and columns of the DataFrame into a MultiIndex.
    The index and columns are expected to have a structure like 'XX_sector_code'.
    """
    def split_index(index):
        return pd.MultiIndex.from_tuples([tuple(i.split('_', 1)) for i in index], names=['Country', 'Sector'])

    df.index = split_index(df.index)
    df.columns = split_index(df.columns)
    return df

def process_files_and_split_index(input_dir, output_dir):
    """
    Process all CSV files in the input folder, split the index and columns into a MultiIndex,
    and save the new files in the specified output directory with a 'multiindex_' prefix.
    """
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            print(f"Processing {filename}...")

            df = pd.read_csv(file_path, index_col=0)
            df = split_index_to_multiindex(df)

            # Save the modified DataFrame to the output directory with 'multiindex_' prefix
            output_file_path = os.path.join(output_dir, f'multiindex_{filename}')
            df.to_csv(output_file_path)
            print(f"Processed and saved {filename} to {output_file_path}")

# Example usage
input_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/data/raw/figaro_tables'
output_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_multi_index'
process_files_and_split_index(input_dir, output_dir)

In [None]:
# Define the file path for the 2021 multiindex figaro table
file_path_2021 = os.path.join(output_dir, 'multiindex_2021_figaro_64.csv')

# Load the 2021 multiindex figaro table
df_2021 = pd.read_csv(file_path_2021, index_col=[0, 1], header=[0, 1])

# Display the first few rows of the dataframe to verify
print(df_2021.head())

In [None]:
# ---------------------------------------------------
# 1. Sector Mapping (Renaming + Aggregation)
# ---------------------------------------------------

sector_mapping = {
    "C10T12": "C10-12",
    "C13T15": "C13-15",
    "C25": "C25_33", "C33": "C25_33",  # Aggregated
    "C28": "C28_32", "C32": "C28_32",  # Aggregated
    "C31_32": "C31_32",
    "E37T39": "E37-39",
    "J58": "J", "J59_60": "J", "J61": "J", "J62_63": "J",  # Aggregated
    "M69_70": "M_N", "M71": "M_N", "M72": "M_N", "M73": "M_N", "M74_75": "M_N",
    "N77": "M_N", "N78": "M_N", "N79": "M_N", "N80T82": "M_N",
    "Q86": "Q", "Q87_88": "Q",
    "R90T92": "R_S", "R93": "R_S", "S94": "R_S", "S95": "R_S", "S96": "R_S",
    "L": "L68"
}

# ---------------------------------------------------
# 2. Load FIGARO Data for 2021
# ---------------------------------------------------

# Define the file path for the 2021 multiindex figaro table
file_path_2021 = os.path.join(output_dir, 'multiindex_2021_figaro_64.csv')

# Load the 2021 multiindex figaro table
df_2021 = pd.read_csv(file_path_2021, index_col=[0, 1], header=[0, 1])

# Display the first few rows to verify structure
print(df_2021.head())

# ---------------------------------------------------
# 3. Function to Apply Sector Mapping and Aggregate
# ---------------------------------------------------

def apply_sector_mapping(df, sector_mapping):
    """
    Rename and aggregate sectors in both rows and columns using the provided mapping.
    """

    # ✅ Step 1: Rename Row Index (Industries)
    new_row_index = [(country, sector_mapping.get(sector, sector)) for country, sector in df.index]
    df.index = pd.MultiIndex.from_tuples(new_row_index, names=['Country', 'Sector'])

    # ✅ Step 2: Rename Column Index (Industries)
    new_col_index = [(country, sector_mapping.get(sector, sector)) for country, sector in df.columns]
    df.columns = pd.MultiIndex.from_tuples(new_col_index, names=['Country', 'Sector'])

    # ✅ Step 3: Aggregate Mapped Sectors
    df = df.groupby(level=['Country', 'Sector']).sum()  # Aggregate rows
    df = df.groupby(level=['Country', 'Sector'], axis=1).sum()  # Aggregate columns

    return df

# ---------------------------------------------------
# 4. Apply Sector Mapping to FIGARO Data
# ---------------------------------------------------

df_2021_mapped = apply_sector_mapping(df_2021, sector_mapping)

# Display the first few rows to verify the aggregation worked
print(df_2021_mapped.head())

# ---------------------------------------------------
# 5. Add Gross Output Row
# ---------------------------------------------------

def add_gross_output_row(df):
    gross_output = df.sum(axis=0)
    gross_output.name = ('GO', 'GO')
    df = pd.concat([df, pd.DataFrame(gross_output).T])
    return df

df_2021_mapped = add_gross_output_row(df_2021_mapped)

# Ensure the row indices have the same named index structure as the columns
df_2021_mapped.index.names = df_2021_mapped.columns.names

# Display the row index names
print("Row index names:", df_2021_mapped.index.names)

# Display the column index names
print("Column index names:", df_2021_mapped.columns.names)

# Display the row indices
display(df_2021_mapped.index)

# Display the first few rows to verify the Gross Output row was added
print(df_2021_mapped.tail())
df_2021_mapped.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_mapped_2021.csv')

In [None]:
def merge_countries(df, countries_to_merge, target='FIGW1'):
    """
    Relabels rows and columns so that any country in countries_to_merge is replaced by target.
    Then groups by the MultiIndex to sum the duplicated entries.
    
    Assumes both rows and columns are MultiIndex with levels ['Country', 'Sector'].
    
    Parameters:
      df: pd.DataFrame with MultiIndex for both rows and columns
      countries_to_merge: list of country codes to merge (e.g., ['AR', 'SA'])
      target: the target country code to absorb the values (default 'FIGW1')
      
    Returns:
      A DataFrame with the specified countries merged into the target.
    """
    # Save the original index names (should be ['Country', 'Sector'])
    row_index_names = df.index.names
    col_index_names = df.columns.names
    
    # --- Relabel row index: Replace countries in countries_to_merge with target
    new_row_index = [
        (target if country in countries_to_merge else country, sector)
        for country, sector in df.index
    ]
    df.index = pd.MultiIndex.from_tuples(new_row_index, names=row_index_names)
    
    # --- Relabel column index: Replace countries in countries_to_merge with target
    new_col_index = [
        (target if country in countries_to_merge else country, sector)
        for country, sector in df.columns
    ]
    df.columns = pd.MultiIndex.from_tuples(new_col_index, names=col_index_names)
    
    # --- Group by the MultiIndex levels to aggregate duplicate entries (summing over duplicates)
    df = df.groupby(level=row_index_names).sum()
    df = df.groupby(axis=1, level=col_index_names).sum()
    
    return df


merged_df = merge_countries(df_2021_mapped, ['AR', 'SA'])
display(merged_df)

merged_df.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_mapped_2021_merged.csv')

In [None]:
# ----- STEP 0: Extract the gross output row
# The gross output row has both levels of the index equal to "GO"
go_row = merged_df.loc[("GO", "GO")]

# ----- STEP 1: Build a dictionary for gross output for each supplier column
# For each supplier column (e.g. (AT, A01)), retrieve its gross output from the GO row.
gross_output = {}
for col in merged_df.columns:
    country, supplier_sector = col
    if supplier_sector != "GO":  # Only for supplier columns
        try:
            # The gross output for column (country, supplier_sector) is in the GO row.
            go_value = go_row[col]
            gross_output[col] = go_value
        except KeyError:
            print(f"Warning: Gross output for supplier column {col} not found; defaulting to 1.")
            gross_output[col] = 1

# ----- STEP 2: Create the interindustry block
# Exclude rows and columns where the Sector is "GO"
rows_mask = merged_df.index.get_level_values("Sector") != "GO"
cols_mask = merged_df.columns.get_level_values("Sector") != "GO"
interindustry_block = merged_df.loc[rows_mask, cols_mask]

# ----- STEP 3: Compute the technical coefficients
# For each supplier column, divide all its entries by its corresponding gross output.
def normalize_column(col):
    supplier_key = col.name  # a tuple (country, supplier_sector)
    denominator = gross_output.get(supplier_key, 1)
    return col / denominator

tech_coeff = interindustry_block.apply(normalize_column, axis=0)

# ----- STEP 4: Save or display the technical coefficients table
tech_coeff.to_csv('technical_coefficients_from_merged.csv')
print("Technical coefficients table saved to 'technical_coefficients_from_merged.csv'.")

In [None]:
import pandas as pd

# ----- STEP 0: Extract the gross output row (since it’s unique, with index ("GO","GO"))
go_row = merged_df.loc[("GO", "GO")]

# ----- STEP 1: Build a dictionary for gross output for each supplier column
# For each supplier column (e.g. (AT, A01)), retrieve its gross output from the GO row.
gross_output = {}
for col in merged_df.columns:
    country, supplier_sector = col
    if supplier_sector != "GO":  # Only for supplier columns
        try:
            # Retrieve the gross output for this supplier column from the GO row.
            go_value = go_row[col]
            gross_output[col] = go_value
        except KeyError:
            print(f"Warning: Gross output for supplier column {col} not found; defaulting to 1.")
            gross_output[col] = 1

# ----- STEP 2: Create the interindustry block by excluding rows and columns where Sector == "GO"
rows_mask = merged_df.index.get_level_values("Sector") != "GO"
cols_mask = merged_df.columns.get_level_values("Sector") != "GO"
interindustry_block = merged_df.loc[rows_mask, cols_mask]

# ----- STEP 3: Compute the technical coefficients
def normalize_column(col):
    supplier_key = col.name  # a tuple (country, supplier_sector)
    denominator = gross_output.get(supplier_key, 1)
    return col / denominator

tech_coeff = interindustry_block.apply(normalize_column, axis=0)

# ----- STEP 4: Duplicate B rows with new sector names "B_gas" and "B_nongas"
# Select rows where Sector == "B"
b_rows = tech_coeff.loc[tech_coeff.index.get_level_values("Sector") == "B"].copy()

# Create duplicates with new sector names
def update_sector(index, new_label):
    # index is a MultiIndex; we rebuild it with the second level replaced for rows where it is "B"
    new_tuples = []
    for country, sector in index:
        if sector == "B":
            new_tuples.append((country, new_label))
        else:
            new_tuples.append((country, sector))
    return pd.MultiIndex.from_tuples(new_tuples, names=index.names)

b_rows_gas = b_rows.copy()
b_rows_nongas = b_rows.copy()
b_rows_gas.index = update_sector(b_rows.index, "B_gas")
b_rows_nongas.index = update_sector(b_rows.index, "B_nongas")

# Remove the original B rows and append the duplicates
non_b_rows = tech_coeff.loc[tech_coeff.index.get_level_values("Sector") != "B"]
tech_coeff_rows_modified = pd.concat([non_b_rows, b_rows_gas, b_rows_nongas]).sort_index()

# ----- STEP 5: Duplicate B columns with new sector names "B_gas" and "B_nongas"
# Columns are a MultiIndex. First, identify columns where the Sector level is "B".
b_cols_mask = tech_coeff_rows_modified.columns.get_level_values("Sector") == "B"
b_cols = tech_coeff_rows_modified.columns[b_cols_mask]

# Extract the B columns
b_cols_df = tech_coeff_rows_modified.loc[:, b_cols].copy()

# Create two copies with new column labels
def update_column_labels(columns, new_label):
    new_tuples = []
    for country, sector in columns:
        if sector == "B":
            new_tuples.append((country, new_label))
        else:
            new_tuples.append((country, sector))
    return pd.MultiIndex.from_tuples(new_tuples, names=columns.names)

b_cols_gas = b_cols_df.copy()
b_cols_nongas = b_cols_df.copy()
b_cols_gas.columns = update_column_labels(b_cols_gas.columns, "B_gas")
b_cols_nongas.columns = update_column_labels(b_cols_nongas.columns, "B_nongas")

# Remove original B columns from the DataFrame
non_b_cols_mask = tech_coeff_rows_modified.columns.get_level_values("Sector") != "B"
non_b_cols = tech_coeff_rows_modified.columns[non_b_cols_mask]
tech_coeff_cols_modified = tech_coeff_rows_modified.loc[:, non_b_cols]

# Concatenate the non-B columns with the new duplicates along axis=1
final_df = pd.concat([tech_coeff_cols_modified, b_cols_gas, b_cols_nongas], axis=1)

# Optionally, sort the columns (and rows) if desired:
final_df = final_df.sort_index(axis=0).sort_index(axis=1)

# ----- STEP 6: Save the final modified table to CSV
final_df.to_csv('technical_coefficients_modified.csv')
print("Modified technical coefficients table saved to 'technical_coefficients_modified.csv'.")
