In [None]:
import numpy as np
import os
import pymrio
import pandas as pd
from IPython.display import display
import country_converter as coco

In [None]:
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 100)

In [None]:
# Define the storing folder for Exiobase3 data
exio3_folder = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/exiobase'
download_folder = os.path.join(exio3_folder, 'exio_download')

# Check if the exio_download folder exists, create if not
if not os.path.exists(download_folder):
    os.makedirs(download_folder)
    print(f"Created directory: {download_folder}")
else:
    print(f"Directory already exists: {download_folder}")

# Download Exiobase3 data to the specified folder
exio_downloadlog = pymrio.download_exiobase3(storage_folder=download_folder, system="ixi", years=[2018, 2019, 2020, 2021, 2022])
print(exio_downloadlog)


In [None]:
# Parse Exiobase3 (2021) data
exio3 = pymrio.parse_exiobase3(path='C:/Users/danie/Nextcloud/Coding/Masterthesis/exiobase/exio_download/IOT_2021_ixi.zip')

In [None]:
# Assess meta data
print(exio3.meta)

In [None]:
### Check for geographical sampling differences between FIGARO and EXIOBASE 3 ###

# FIGARO countries
figaro_countries = [
    'AR', 'AT', 'AU', 'BE', 'BG', 'BR', 'CA', 'CH', 'CN', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FIGW1', 'FR', 'GB',
    'GR', 'HR', 'HU', 'ID', 'IE', 'IN', 'IT', 'JP', 'KR', 'LT', 'LU', 'LV', 'MT', 'MX', 'NL', 'NO', 'PL', 'PT', 'RO', 'RU',
    'SA', 'SE', 'SI', 'SK', 'TR', 'US', 'ZA'
]

# Extract country codes from EXIOBASE 3 dataset
exio_countries = exio3.get_regions()

# Compare country codes
common_countries = sorted(set(figaro_countries).intersection(exio_countries))
figaro_only_countries = sorted(set(figaro_countries) - set(exio_countries))
exio_only_countries = sorted(set(exio_countries) - set(figaro_countries))

print("Common countries:", common_countries)
print("Countries only in FIGARO:", figaro_only_countries)
print("Countries only in EXIOBASE 3:", exio_only_countries)

In [None]:
# Argentina and Saudi Arabia are not in EXIOBASE 3, but in FIGARO
# Taiwan is in EXIOBASE 3, but not in FIGARO
# FIGW1 is ROW in Figaro
# WA (Asia), WE (Europe), WF (Africa), WL (Latin America), WM (Middle East), WP (Pacific) are ROW regions in Exiobase3

# Collect all RoW regions and Taiwan in one FIGARO region
exio3.rename_regions({'WA': 'FIGW1', 'WE': 'FIGW1', 'WF': 'FIGW1', 'WL': 'FIGW1', 'WM': 'FIGW1', 'WP': 'FIGW1', 'TW': 'FIGW1'})

# Aggregate EXIOBASE 3 data to FIGARO regions
exio3.aggregate_duplicates(inplace=True)
exio3.Z.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/exiobase/country_agg_exio3_figaro.csv')


In [None]:
# Check available classification data that contains possibly useful different names and aggregation levels
mrio_class = pymrio.get_classification(mrio_name='exio3_ixi')



In [None]:
# Display the full mrio_class
display(mrio_class)

In [None]:
# Create a conversion dictionnary from ExioName to ExioLabel and check for correctness by displaying it
conv_dict = mrio_class.get_sector_dict(mrio_class.sectors.ExioName, mrio_class.sectors.ExioLabel)
display(conv_dict)

# Rename sectors in the pymrio object
exio3.rename_sectors(conv_dict)

# Check if the renaming was successful
print(exio3.Z.index)

In [None]:
### Aggregate Exiobase3 data ###
# Done through renaming, which also helps to adapt it to eurostat data

# Renaming of sectors requires mappping of ExioLabel to NACE classification

rename_dict_exio3_NACE = {
    "A_PARI": "A01",
    "A_WHEA": "A01",
    "A_OCER": "A01",
    "A_FVEG": "A01",
    "A_OILS": "A01",
    "A_SUGB": "A01",
    "A_FIBR": "A01",
    "A_OTCR": "A01",
    "A_CATL": "A01",
    "A_PIGS": "A01",
    "A_PLTR": "A01",
    "A_OMEA": "A01",
    "A_OANP": "A01",
    "A_MILK": "A01",
    "A_WOOL": "A01",
    "A_MANC": "A01",
    "A_MANB": "A01",
    "A_FORE": "A02",
    "A_FISH": "A03",
    "A_GASE": "B_gas",
    "A_OGPL": "B_gas",
    "A_COAL": "B_nongas",
    "A_COIL": "B_nongas",
    "A_ORAN": "B_nongas",
    "A_IRON": "B_nongas",
    "A_COPO": "B_nongas",
    "A_NIKO": "B_nongas",
    "A_ALUO": "B_nongas",
    "A_PREO": "B_nongas",
    "A_LZTO": "B_nongas",
    "A_ONFO": "B_nongas",
    "A_STON": "B_nongas",
    "A_SDCL": "B_nongas",
    "A_CHMF": "B_nongas",
    "A_PCAT": "C10-12",
    "A_PPIG": "C10-12",
    "A_PPLT": "C10-12",
    "A_POME": "C10-12",
    "A_VOIL": "C10-12",
    "A_DAIR": "C10-12",
    "A_RICE": "C10-12",
    "A_SUGR": "C10-12",
    "A_OFOD": "C10-12",
    "A_BEVR": "C10-12",
    "A_FSHP": "C10-12",
    "A_TOBC": "C10-12",
    "A_TEXT": "C13-15",
    "A_GARM": "C13-15",
    "A_LETH": "C13-15",
    "A_WOOD": "C16",
    "A_WOOW": "C16",
    "A_PULP": "C17",
    "A_PAPR": "C17",
    "A_PAPE": "C17",
    "A_MDIA": "C18",
    "A_COKE": "C19",
    "A_REFN": "C19",
    "A_PLAS": "C20-21",
    "A_PLAW": "C20-21",
    "A_NFER": "C20-21",
    "A_PFER": "C20-21",
    "A_CHEM": "C20-21",
    "A_RUBP": "C22",
    "A_GLAS": "C23",
    "A_GLAW": "C23",
    "A_CRMC": "C23",
    "A_BRIK": "C23",
    "A_CMNT": "C23",
    "A_ASHW": "C23",
    "A_ONMM": "C23",
    "A_NUCF": "C24",
    "A_STEL": "C24",
    "A_STEW": "C24",
    "A_PREM": "C24",
    "A_PREW": "C24",
    "A_ALUM": "C24",
    "A_ALUW": "C24",
    "A_LZTP": "C24",
    "A_LZTW": "C24",
    "A_COPP": "C24",
    "A_COPW": "C24",
    "A_ONFM": "C24",
    "A_ONFW": "C24",
    "A_METC": "C24",
    "A_FABM": "C25_33",
    "A_MACH": "C25",
    "A_OFMA": "C26",
    "A_ELMA": "C27",
    "A_RATV": "C27",
    "A_MEIN": "C28_32",
    "A_MOTO": "C29",
    "A_OTRE": "C30",
    "A_FURN": "C31",
    "A_POWC": "D35",
    "A_POWG": "D35",
    "A_POWN": "D35",
    "A_POWH": "D35",
    "A_POWW": "D35",
    "A_POWP": "D35",
    "A_POWB": "D35",
    "A_POWS": "D35",
    "A_POWE": "D35",
    "A_POWO": "D35",
    "A_POWM": "D35",
    "A_POWZ": "D35",
    "A_POWT": "D35",
    "A_POWD": "D35",
    "A_GASD": "D35",
    "A_HWAT": "D35",
    "A_WATR": "E36",
    "A_RYMS": "E37-39",
    "A_BOTW": "E37-39",
    "A_INCF": "E37-39",
    "A_INCP": "E37-39",
    "A_INCL": "E37-39",
    "A_INCM": "E37-39",
    "A_INCT": "E37-39",
    "A_INCW": "E37-39",
    "A_INCO": "E37-39",
    "A_BIOF": "E37-39",
    "A_BIOP": "E37-39",
    "A_BIOS": "E37-39",
    "A_COMF": "E37-39",
    "A_COMW": "E37-39",
    "A_WASF": "E37-39",
    "A_WASO": "E37-39",
    "A_LANF": "E37-39",
    "A_LANP": "E37-39",
    "A_LANL": "E37-39",
    "A_LANI": "E37-39",
    "A_LANT": "E37-39",
    "A_LANW": "E37-39",
    "A_CONS": "F",
    "A_CONW": "F",
    "A_TDMO": "G45",
    "A_TDWH": "G46",
    "A_TDFU": "G47",
    "A_TDRT": "G47",
    "A_TRAI": "H49",
    "A_TLND": "H49",
    "A_TPIP": "H49",
    "A_TWAS": "H50",
    "A_TWAI": "H50",
    "A_TAIR": "H51",
    "A_TAUX": "H52",
    "A_PTEL": "H53",
    "A_HORE": "I",
    "A_COMP": "J62_63",
    "A_FINT": "K64",
    "A_FINS": "K65",
    "A_FAUX": "K66",
    "A_REAL": "L68",
    "A_RESD": "M_N",
    "A_OBUS": "M_N",
    "A_MARE": "M_N",
    "A_PADF": "O84",
    "A_EDUC": "P85",
    "A_HEAL": "Q",
    "A_RECR": "R_S",
    "A_ORGA": "R_S",
    "A_OSER": "R_S",
    "A_PRHH": "T",
    "A_EXTO": "U"
}

In [None]:
# Apply mapping with the rename_sectors tool of pymrio
exio3.rename_sectors(rename_dict_exio3_NACE)
print(exio3.Z.index)

# Aggregate duplicates
exio3.aggregate_duplicates()
print(exio3.Z)

# Convert to df
exio_Z_df = pd.DataFrame(exio3.Z)


In [None]:
display(exio_Z_df)

In [None]:
# Extraction snippet of the energy outputs

# Extract the energy matrix for B_gas and B_nongas
energy_matrix = exio3.Z.loc[(slice(None), ['B_gas', 'B_nongas']), :]

# Reorder the matrix according to the country
energy_matrix = energy_matrix.sort_index(level=0)

# Display the reordered energy matrix
print(energy_matrix)

In [None]:
# Extract the energy inputs for each sector and country
energy_inputs = exio3.Z.loc[(slice(None), ['B_gas', 'B_nongas']), :]

# Compute the total energy inputs by summing the gas and nongas inputs
total_energy_inputs = energy_inputs.groupby(level=0).sum()

# Calculate the share of gas and nongas in the total energy inputs
energy_shares = energy_inputs.div(total_energy_inputs, level=0)

# Sort the energy shares so that gas and nongas of each country are next to each other
energy_shares = energy_shares.sort_index(level=0)

# Convert energy_shares to a pandas DataFrame
energy_shares_df = pd.DataFrame(energy_shares)

# Display the energy shares DataFrame
display(energy_shares_df.head())

In [None]:
# List of countries
countries = energy_shares_df.index.get_level_values(0).unique().tolist()

# List of covered sectors
sectors = energy_shares_df.columns.get_level_values(1).unique().tolist()

# Number of sectors per country
sectors_per_country = energy_shares_df.groupby(level=0).size()

# Number of countries
num_countries = len(countries)

# Number of sectors
num_sectors = len(sectors)

print("List of countries:", countries)
print("Number of countries:", num_countries)
print("List of covered sectors:", sectors)
print("Number of sectors:", num_sectors)
print("Number of sectors per country:")
print(sectors_per_country)

In [None]:
def split_index_to_multiindex(df):
    """
    Split the index and columns of the DataFrame into a MultiIndex.
    The index and columns are expected to have a structure like 'XX_sector_code'.
    """
    def split_index(index):
        return pd.MultiIndex.from_tuples([tuple(i.split('_', 1)) for i in index], names=['Country', 'Sector'])

    df.index = split_index(df.index)
    df.columns = split_index(df.columns)
    return df

def process_files_and_split_index(input_dir, output_dir):
    """
    Process all CSV files in the input folder, split the index and columns into a MultiIndex,
    and save the new files in the specified output directory with a 'multiindex_' prefix.
    """
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            print(f"Processing {filename}...")

            df = pd.read_csv(file_path, index_col=0)
            df = split_index_to_multiindex(df)

            # Save the modified DataFrame to the output directory with 'multiindex_' prefix
            output_file_path = os.path.join(output_dir, f'multiindex_{filename}')
            df.to_csv(output_file_path)
            print(f"Processed and saved {filename} to {output_file_path}")

# Example usage
input_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/data/raw/figaro_tables'
output_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_multi_index'
process_files_and_split_index(input_dir, output_dir)

In [None]:
# Define the file path for the 2021 multiindex figaro table
file_path_2021 = os.path.join(output_dir, 'multiindex_2021_figaro_64.csv')

# Load the 2021 multiindex figaro table
df_2021 = pd.read_csv(file_path_2021, index_col=[0, 1], header=[0, 1])

# Display the first few rows of the dataframe to verify
print(df_2021.head())

In [None]:
# ---------------------------------------------------
# 1. Sector Mapping (Renaming + Aggregation)
# ---------------------------------------------------

sector_mapping = {
    "C10T12": "C10-12",
    "C13T15": "C13-15",
    "E37T39": "E37-39",
    "J58": "J", "J59_60": "J", "J61": "J", "J62_63": "J",
    "M69_70": "M_N", "M71": "M_N", "M72": "M_N", "M73": "M_N", "M74_75": "M_N",
    "N77": "M_N", "N78": "M_N", "N79": "M_N", "N80T82": "M_N",
    "Q86": "Q", "Q87_88": "Q",
    "R90T92": "R_S", "R93": "R_S", "S94": "R_S", "S95": "R_S", "S96": "R_S",
    "L": "L68"
}

# ---------------------------------------------------
# 2. Load FIGARO Data for 2021
# ---------------------------------------------------

# Define the file path for the 2021 multiindex figaro table
file_path_2021 = os.path.join(output_dir, 'multiindex_2021_figaro_64.csv')

# Load the 2021 multiindex figaro table
df_2021 = pd.read_csv(file_path_2021, index_col=[0, 1], header=[0, 1])

# Display the first few rows to verify structure
print(df_2021.head())

# ---------------------------------------------------
# 3. Function to Apply Sector Mapping and Aggregate
# ---------------------------------------------------

def apply_sector_mapping(df, sector_mapping):
    """
    Rename and aggregate sectors in both rows and columns using the provided mapping.
    """

    # ✅ Step 1: Rename Row Index (Industries)
    new_row_index = [(country, sector_mapping.get(sector, sector)) for country, sector in df.index]
    df.index = pd.MultiIndex.from_tuples(new_row_index, names=['Country', 'Sector'])

    # ✅ Step 2: Rename Column Index (Industries)
    new_col_index = [(country, sector_mapping.get(sector, sector)) for country, sector in df.columns]
    df.columns = pd.MultiIndex.from_tuples(new_col_index, names=['Country', 'Sector'])

    # ✅ Step 3: Aggregate Mapped Sectors
    df = df.groupby(level=['Country', 'Sector']).sum()  # Aggregate rows
    df = df.groupby(level=['Country', 'Sector'], axis=1).sum()  # Aggregate columns

    return df

# ---------------------------------------------------
# 4. Apply Sector Mapping to FIGARO Data
# ---------------------------------------------------

df_2021_mapped = apply_sector_mapping(df_2021, sector_mapping)

# Display the first few rows to verify the aggregation worked
print(df_2021_mapped.head())

# ---------------------------------------------------
# 5. Add Gross Output Row
# ---------------------------------------------------

def add_gross_output_row(df):
    gross_output = df.sum(axis=0)
    gross_output.name = ('GO', 'GO')
    df = pd.concat([df, pd.DataFrame(gross_output).T])
    return df

df_2021_mapped = add_gross_output_row(df_2021_mapped)

# Ensure the row indices have the same named index structure as the columns
df_2021_mapped.index.names = df_2021_mapped.columns.names

# Display the row index names
print("Row index names:", df_2021_mapped.index.names)

# Display the column index names
print("Column index names:", df_2021_mapped.columns.names)

# Display the row indices
display(df_2021_mapped.index)

# Display the first few rows to verify the Gross Output row was added
print(df_2021_mapped.tail())
df_2021_mapped.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_mapped_2021.csv')

In [None]:
def merge_countries(df, countries_to_merge, target='FIGW1'):
    """
    Relabels rows and columns so that any country in countries_to_merge is replaced by target.
    Then groups by the MultiIndex to sum the duplicated entries.
    
    Assumes both rows and columns are MultiIndex with levels ['Country', 'Sector'].
    
    Parameters:
      df: pd.DataFrame with MultiIndex for both rows and columns
      countries_to_merge: list of country codes to merge (e.g., ['AR', 'SA'])
      target: the target country code to absorb the values (default 'FIGW1')
      
    Returns:
      A DataFrame with the specified countries merged into the target.
    """
    # Save the original index names (should be ['Country', 'Sector'])
    row_index_names = df.index.names
    col_index_names = df.columns.names
    
    # --- Relabel row index: Replace countries in countries_to_merge with target
    new_row_index = [
        (target if country in countries_to_merge else country, sector)
        for country, sector in df.index
    ]
    df.index = pd.MultiIndex.from_tuples(new_row_index, names=row_index_names)
    
    # --- Relabel column index: Replace countries in countries_to_merge with target
    new_col_index = [
        (target if country in countries_to_merge else country, sector)
        for country, sector in df.columns
    ]
    df.columns = pd.MultiIndex.from_tuples(new_col_index, names=col_index_names)
    
    # --- Group by the MultiIndex levels to aggregate duplicate entries (summing over duplicates)
    df = df.groupby(level=row_index_names).sum()
    df = df.groupby(axis=1, level=col_index_names).sum()
    
    return df


merged_df = merge_countries(df_2021_mapped, ['AR', 'SA'])
display(merged_df)

merged_df.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_mapped_2021_merged.csv')

In [None]:
# ----- STEP 0: Extract the gross output row
# The gross output row has both levels of the index equal to "GO"
go_row = merged_df.loc[("GO", "GO")]

# ----- STEP 1: Build a dictionary for gross output for each supplier column
# For each supplier column (e.g. (AT, A01)), retrieve its gross output from the GO row.
gross_output = {}
for col in merged_df.columns:
    country, supplier_sector = col
    if supplier_sector != "GO":  # Only for supplier columns
        try:
            # The gross output for column (country, supplier_sector) is in the GO row.
            go_value = go_row[col]
            gross_output[col] = go_value
        except KeyError:
            print(f"Warning: Gross output for supplier column {col} not found; defaulting to 1.")
            gross_output[col] = 1

# ----- STEP 2: Create the interindustry block
# Exclude rows and columns where the Sector is "GO"
rows_mask = merged_df.index.get_level_values("Sector") != "GO"
cols_mask = merged_df.columns.get_level_values("Sector") != "GO"
interindustry_block = merged_df.loc[rows_mask, cols_mask]

# ----- STEP 3: Compute the technical coefficients
# For each supplier column, divide all its entries by its corresponding gross output.
def normalize_column(col):
    supplier_key = col.name  # a tuple (country, supplier_sector)
    denominator = gross_output.get(supplier_key, 1)
    return col / denominator

tech_coeff = interindustry_block.apply(normalize_column, axis=0)

# ----- STEP 4: Save or display the technical coefficients table
tech_coeff.to_csv('technical_coefficients_from_merged.csv')
print("Technical coefficients table saved to 'technical_coefficients_from_merged.csv'.")

In [None]:
import pandas as pd

# ----- STEP 0: Extract the gross output row (since it’s unique, with index ("GO","GO"))
go_row = merged_df.loc[("GO", "GO")]

# ----- STEP 1: Build a dictionary for gross output for each supplier column
# For each supplier column (e.g. (AT, A01)), retrieve its gross output from the GO row.
gross_output = {}
for col in merged_df.columns:
    country, supplier_sector = col
    if supplier_sector != "GO":  # Only for supplier columns
        try:
            # Retrieve the gross output for this supplier column from the GO row.
            go_value = go_row[col]
            gross_output[col] = go_value
        except KeyError:
            print(f"Warning: Gross output for supplier column {col} not found; defaulting to 1.")
            gross_output[col] = 1

# ----- STEP 2: Create the interindustry block by excluding rows and columns where Sector == "GO"
rows_mask = merged_df.index.get_level_values("Sector") != "GO"
cols_mask = merged_df.columns.get_level_values("Sector") != "GO"
interindustry_block = merged_df.loc[rows_mask, cols_mask]

# ----- STEP 3: Compute the technical coefficients
def normalize_column(col):
    supplier_key = col.name  # a tuple (country, supplier_sector)
    denominator = gross_output.get(supplier_key, 1)
    return col / denominator

tech_coeff = interindustry_block.apply(normalize_column, axis=0)

# ----- STEP 4: Duplicate B rows with new sector names "B_gas" and "B_nongas"
# Select rows where Sector == "B"
b_rows = tech_coeff.loc[tech_coeff.index.get_level_values("Sector") == "B"].copy()

# Create duplicates with new sector names
def update_sector(index, new_label):
    # index is a MultiIndex; we rebuild it with the second level replaced for rows where it is "B"
    new_tuples = []
    for country, sector in index:
        if sector == "B":
            new_tuples.append((country, new_label))
        else:
            new_tuples.append((country, sector))
    return pd.MultiIndex.from_tuples(new_tuples, names=index.names)

b_rows_gas = b_rows.copy()
b_rows_nongas = b_rows.copy()
b_rows_gas.index = update_sector(b_rows.index, "B_gas")
b_rows_nongas.index = update_sector(b_rows.index, "B_nongas")

# Remove the original B rows and append the duplicates
non_b_rows = tech_coeff.loc[tech_coeff.index.get_level_values("Sector") != "B"]
tech_coeff_rows_modified = pd.concat([non_b_rows, b_rows_gas, b_rows_nongas]).sort_index()

# ----- STEP 5: Duplicate B columns with new sector names "B_gas" and "B_nongas"
# Columns are a MultiIndex. First, identify columns where the Sector level is "B".
b_cols_mask = tech_coeff_rows_modified.columns.get_level_values("Sector") == "B"
b_cols = tech_coeff_rows_modified.columns[b_cols_mask]

# Extract the B columns
b_cols_df = tech_coeff_rows_modified.loc[:, b_cols].copy()

# Create two copies with new column labels
def update_column_labels(columns, new_label):
    new_tuples = []
    for country, sector in columns:
        if sector == "B":
            new_tuples.append((country, new_label))
        else:
            new_tuples.append((country, sector))
    return pd.MultiIndex.from_tuples(new_tuples, names=columns.names)

b_cols_gas = b_cols_df.copy()
b_cols_nongas = b_cols_df.copy()
b_cols_gas.columns = update_column_labels(b_cols_gas.columns, "B_gas")
b_cols_nongas.columns = update_column_labels(b_cols_nongas.columns, "B_nongas")

# Remove original B columns from the DataFrame
non_b_cols_mask = tech_coeff_rows_modified.columns.get_level_values("Sector") != "B"
non_b_cols = tech_coeff_rows_modified.columns[non_b_cols_mask]
tech_coeff_cols_modified = tech_coeff_rows_modified.loc[:, non_b_cols]

# Concatenate the non-B columns with the new duplicates along axis=1
final_df = pd.concat([tech_coeff_cols_modified, b_cols_gas, b_cols_nongas], axis=1)

# Optionally, sort the columns (and rows) if desired:
final_df = final_df.sort_index(axis=0).sort_index(axis=1)

# ----- STEP 6: Save the final modified table to CSV
final_df.to_csv('technical_coefficients_modified.csv')
print("Modified technical coefficients table saved to 'technical_coefficients_modified.csv'.")


In [None]:
# Save the energy shares DataFrame to a CSV file
energy_shares_df.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/exiobase/energy_shares.csv')

In [None]:
def split_index_to_multiindex(df):
    """
    Split the index and columns of the DataFrame into a MultiIndex.
    The index and columns are expected to have a structure like 'XX_sector_code'.
    """
    def split_index(index):
        return pd.MultiIndex.from_tuples([tuple(i.split('_', 1)) for i in index], names=['Country', 'Sector'])

    df.index = split_index(df.index)
    df.columns = split_index(df.columns)
    return df

def process_files_and_split_index(input_dir, output_dir):
    """
    Process all CSV files in the input folder, split the index and columns into a MultiIndex,
    and save the new files in the specified output directory with a 'multiindex_' prefix.
    """
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.endswith(".csv"):
            file_path = os.path.join(input_dir, filename)
            print(f"Processing {filename}...")

            df = pd.read_csv(file_path, index_col=0)
            df = split_index_to_multiindex(df)

            # Save the modified DataFrame to the output directory with 'multiindex_' prefix
            output_file_path = os.path.join(output_dir, f'multiindex_{filename}')
            df.to_csv(output_file_path)
            print(f"Processed and saved {filename} to {output_file_path}")

In [None]:
# Application for file paths
input_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/data/raw/figaro_tables'
output_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_multi_index'
process_files_and_split_index(input_dir, output_dir)
# Define the file path for the 2021 multiindex figaro table
file_path_2021 = os.path.join(output_dir, 'multiindex_2021_figaro_64.csv')

# Load the 2021 multiindex figaro table
df_2021 = pd.read_csv(file_path_2021, index_col=[0, 1], header=[0, 1])

# Display the first few rows of the dataframe to verify
print(df_2021.head())


In [None]:
# ---------------------------------------------------
# 1. Sector Mapping (Renaming + Aggregation)
# ---------------------------------------------------

sector_mapping = {
    "C10T12": "C10-12",
    "C13T15": "C13-15",
    "C31_32": "C31_32",
    "E37T39": "E37-39",
    "J58": "J", "J59_60": "J", "J61": "J", "J62_63": "J",  # Aggregated
    "M69_70": "M_N", "M71": "M_N", "M72": "M_N", "M73": "M_N", "M74_75": "M_N",
    "N77": "M_N", "N78": "M_N", "N79": "M_N", "N80T82": "M_N",
    "Q86": "Q", "Q87_88": "Q",
    "R90T92": "R_S", "R93": "R_S", "S94": "R_S", "S95": "R_S", "S96": "R_S",
    "L": "L68"
}

# ---------------------------------------------------
# 2. Load FIGARO Data for 2021
# ---------------------------------------------------

# Define the file path for the 2021 multiindex figaro table
file_path_2021 = os.path.join(output_dir, 'multiindex_2021_figaro_64.csv')

# Load the 2021 multiindex figaro table
df_2021 = pd.read_csv(file_path_2021, index_col=[0, 1], header=[0, 1])

# Display the first few rows to verify structure
print(df_2021.head())

# ---------------------------------------------------
# 3. Function to Apply Sector Mapping and Aggregate
# ---------------------------------------------------

def apply_sector_mapping(df, sector_mapping):
    """
    Rename and aggregate sectors in both rows and columns using the provided mapping.
    """

    # ✅ Step 1: Rename Row Index (Industries)
    new_row_index = [(country, sector_mapping.get(sector, sector)) for country, sector in df.index]
    df.index = pd.MultiIndex.from_tuples(new_row_index, names=['Country', 'Sector'])

    # ✅ Step 2: Rename Column Index (Industries)
    new_col_index = [(country, sector_mapping.get(sector, sector)) for country, sector in df.columns]
    df.columns = pd.MultiIndex.from_tuples(new_col_index, names=['Country', 'Sector'])

    # ✅ Step 3: Aggregate Mapped Sectors
    df = df.groupby(level=['Country', 'Sector']).sum()  # Aggregate rows
    df = df.groupby(level=['Country', 'Sector'], axis=1).sum()  # Aggregate columns

    return df

# ---------------------------------------------------
# 4. Apply Sector Mapping to FIGARO Data
# ---------------------------------------------------

df_2021_mapped = apply_sector_mapping(df_2021, sector_mapping)

# Display the first few rows to verify the aggregation worked
print(df_2021_mapped.head())

# ---------------------------------------------------
# 5. Add Gross Output Row
# ---------------------------------------------------

def add_gross_output_row(df):
    gross_output = df.sum(axis=0)
    gross_output.name = ('GO', 'GO')
    df = pd.concat([df, pd.DataFrame(gross_output).T])
    return df

df_2021_mapped = add_gross_output_row(df_2021_mapped)

# Ensure the row indices have the same named index structure as the columns
df_2021_mapped.index.names = df_2021_mapped.columns.names

# Display the row index names
print("Row index names:", df_2021_mapped.index.names)

# Display the column index names
print("Column index names:", df_2021_mapped.columns.names)

# Display the row indices
display(df_2021_mapped.index)

# Display the first few rows to verify the Gross Output row was added
print(df_2021_mapped.tail())
df_2021_mapped.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_mapped_2021.csv')
def merge_countries(df, countries_to_merge, target='FIGW1'):
    """
    Relabels rows and columns so that any country in countries_to_merge is replaced by target.
    Then groups by the MultiIndex to sum the duplicated entries.
    
    Assumes both rows and columns are MultiIndex with levels ['Country', 'Sector'].
    
    Parameters:
      df: pd.DataFrame with MultiIndex for both rows and columns
      countries_to_merge: list of country codes to merge (e.g., ['AR', 'SA'])
      target: the target country code to absorb the values (default 'FIGW1')
      
    Returns:
      A DataFrame with the specified countries merged into the target.
    """
    # Save the original index names (should be ['Country', 'Sector'])
    row_index_names = df.index.names
    col_index_names = df.columns.names
    
    # --- Relabel row index: Replace countries in countries_to_merge with target
    new_row_index = [
        (target if country in countries_to_merge else country, sector)
        for country, sector in df.index
    ]
    df.index = pd.MultiIndex.from_tuples(new_row_index, names=row_index_names)
    
    # --- Relabel column index: Replace countries in countries_to_merge with target
    new_col_index = [
        (target if country in countries_to_merge else country, sector)
        for country, sector in df.columns
    ]
    df.columns = pd.MultiIndex.from_tuples(new_col_index, names=col_index_names)
    
    # --- Group by the MultiIndex levels to aggregate duplicate entries (summing over duplicates)
    df = df.groupby(level=row_index_names).sum()
    df = df.groupby(axis=1, level=col_index_names).sum()
    
    return df


merged_df = merge_countries(df_2021_mapped, ['AR', 'SA'])
display(merged_df)

merged_df.to_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/notebooks/NB_exio3_figaro_gas/figaro_mapped_2021_merged.csv')

In [None]:
# ----- STEP 0: Extract the gross output row
# The gross output row has both levels of the index equal to "GO"
go_row = merged_df.loc[("GO", "GO")]

# ----- STEP 1: Build a dictionary for gross output for each supplier column
# For each supplier column (e.g. (AT, A01)), retrieve its gross output from the GO row.
gross_output = {}
for col in merged_df.columns:
    country, supplier_sector = col
    if supplier_sector != "GO":  # Only for supplier columns
        try:
            # The gross output for column (country, supplier_sector) is in the GO row.
            go_value = go_row[col]
            gross_output[col] = go_value
        except KeyError:
            print(f"Warning: Gross output for supplier column {col} not found; defaulting to 1.")
            gross_output[col] = 1

# ----- STEP 2: Create the interindustry block
# Exclude rows and columns where the Sector is "GO"
rows_mask = merged_df.index.get_level_values("Sector") != "GO"
cols_mask = merged_df.columns.get_level_values("Sector") != "GO"
interindustry_block = merged_df.loc[rows_mask, cols_mask]

# ----- STEP 3: Compute the technical coefficients
# For each supplier column, divide all its entries by its corresponding gross output.
def normalize_column(col):
    supplier_key = col.name  # a tuple (country, supplier_sector)
    denominator = gross_output.get(supplier_key, 1)
    return col / denominator

tech_coeff = interindustry_block.apply(normalize_column, axis=0)

# ----- STEP 4: Save or display the technical coefficients table
tech_coeff.to_csv('technical_coefficients_from_merged.csv')
print("Technical coefficients table saved to 'technical_coefficients_from_merged.csv'.")
import pandas as pd

In [None]:
# ----- STEP 0: Extract the gross output row (since it’s unique, with index ("GO","GO"))
go_row = merged_df.loc[("GO", "GO")]

# ----- STEP 1: Build a dictionary for gross output for each supplier column
# For each supplier column (e.g. (AT, A01)), retrieve its gross output from the GO row.
gross_output = {}
for col in merged_df.columns:
    country, supplier_sector = col
    if supplier_sector != "GO":  # Only for supplier columns
        try:
            # Retrieve the gross output for this supplier column from the GO row.
            go_value = go_row[col]
            gross_output[col] = go_value
        except KeyError:
            print(f"Warning: Gross output for supplier column {col} not found; defaulting to 1.")
            gross_output[col] = 1

# ----- STEP 2: Create the interindustry block by excluding rows and columns where Sector == "GO"
rows_mask = merged_df.index.get_level_values("Sector") != "GO"
cols_mask = merged_df.columns.get_level_values("Sector") != "GO"
interindustry_block = merged_df.loc[rows_mask, cols_mask]

# ----- STEP 3: Compute the technical coefficients
def normalize_column(col):
    supplier_key = col.name  # a tuple (country, supplier_sector)
    denominator = gross_output.get(supplier_key, 1)
    return col / denominator

tech_coeff = interindustry_block.apply(normalize_column, axis=0)

# ----- STEP 4: Duplicate B rows with new sector names "B_gas" and "B_nongas"
# Select rows where Sector == "B"
b_rows = tech_coeff.loc[tech_coeff.index.get_level_values("Sector") == "B"].copy()

# Create duplicates with new sector names
def update_sector(index, new_label):
    # index is a MultiIndex; we rebuild it with the second level replaced for rows where it is "B"
    new_tuples = []
    for country, sector in index:
        if sector == "B":
            new_tuples.append((country, new_label))
        else:
            new_tuples.append((country, sector))
    return pd.MultiIndex.from_tuples(new_tuples, names=index.names)

b_rows_gas = b_rows.copy()
b_rows_nongas = b_rows.copy()
b_rows_gas.index = update_sector(b_rows.index, "B_gas")
b_rows_nongas.index = update_sector(b_rows.index, "B_nongas")

# Remove the original B rows and append the duplicates
non_b_rows = tech_coeff.loc[tech_coeff.index.get_level_values("Sector") != "B"]
tech_coeff_rows_modified = pd.concat([non_b_rows, b_rows_gas, b_rows_nongas]).sort_index()

# ----- STEP 5: Duplicate B columns with new sector names "B_gas" and "B_nongas"
# Columns are a MultiIndex. First, identify columns where the Sector level is "B".
b_cols_mask = tech_coeff_rows_modified.columns.get_level_values("Sector") == "B"
b_cols = tech_coeff_rows_modified.columns[b_cols_mask]

# Extract the B columns
b_cols_df = tech_coeff_rows_modified.loc[:, b_cols].copy()

# Create two copies with new column labels
def update_column_labels(columns, new_label):
    new_tuples = []
    for country, sector in columns:
        if sector == "B":
            new_tuples.append((country, new_label))
        else:
            new_tuples.append((country, sector))
    return pd.MultiIndex.from_tuples(new_tuples, names=columns.names)

b_cols_gas = b_cols_df.copy()
b_cols_nongas = b_cols_df.copy()
b_cols_gas.columns = update_column_labels(b_cols_gas.columns, "B_gas")
b_cols_nongas.columns = update_column_labels(b_cols_nongas.columns, "B_nongas")

# Remove original B columns from the DataFrame
non_b_cols_mask = tech_coeff_rows_modified.columns.get_level_values("Sector") != "B"
non_b_cols = tech_coeff_rows_modified.columns[non_b_cols_mask]
tech_coeff_cols_modified = tech_coeff_rows_modified.loc[:, non_b_cols]

# Concatenate the non-B columns with the new duplicates along axis=1
final_df = pd.concat([tech_coeff_cols_modified, b_cols_gas, b_cols_nongas], axis=1)

# Optionally, sort the columns (and rows) if desired:
final_df = final_df.sort_index(axis=0).sort_index(axis=1)

# ----- STEP 6: Save the final modified table to CSV
final_df.to_csv('technical_coefficients_modified.csv')
print("Modified technical coefficients table saved to 'technical_coefficients_modified.csv'.")


In [None]:
import pandas as pd

# Load the technical coefficients and energy shares data
technical_coefficients = pd.read_csv('technical_coefficients_modified.csv', index_col=[0, 1], header=[0, 1])
energy_shares = pd.read_csv('C:/Users/danie/Nextcloud/Coding/Masterthesis/exiobase/energy_shares.csv', index_col=[0, 1], header=[0, 1])

# Ensure the indices are sorted for consistent operations
technical_coefficients.sort_index(axis=0, inplace=True)
technical_coefficients.sort_index(axis=1, inplace=True)
energy_shares.sort_index(axis=0, inplace=True)
energy_shares.sort_index(axis=1, inplace=True)

# Iterate over all countries and sectors, applying weights and handling missing values
for country in energy_shares.index.get_level_values(0).unique():
    for sector in energy_shares.columns.get_level_values(1).unique():
        # Check if the index exists, otherwise set default values
        gas_share = energy_shares.loc[(country, 'B_gas'), (country, sector)] if (country, 'B_gas') in energy_shares.index and (country, sector) in energy_shares.columns else 0
        nongas_share = energy_shares.loc[(country, 'B_nongas'), (country, sector)] if (country, 'B_nongas') in energy_shares.index and (country, sector) in energy_shares.columns else 1

        # Apply the shares if the indices exist in technical_coefficients
        if (country, 'B_gas') in technical_coefficients.index and (country, sector) in technical_coefficients.columns:
            technical_coefficients.loc[(country, 'B_gas'), (country, sector)] *= gas_share
        if (country, 'B_nongas') in technical_coefficients.index and (country, sector) in technical_coefficients.columns:
            technical_coefficients.loc[(country, 'B_nongas'), (country, sector)] *= nongas_share

# Save the updated technical coefficients table
technical_coefficients.to_csv('technical_coefficients_with_weights.csv')
print("Updated technical coefficients table saved to 'technical_coefficients_with_weights.csv'.")
