In [3]:
import os
import pandas as pd
import numpy as np

# ---------------------------------------------------
# 1. Load Data and Calculate Technical Coefficients
# ---------------------------------------------------

def extract_gross_output(output_df):
    """
    Extract the Gross Output (Total Output) from the output DataFrame.
    Assumes that the 'op_GO' row represents the gross output for each industry.
    """
    # Find the row that contains the gross output (adjust this based on your data)
    if 'op_GO' in output_df.index:
        gross_output = output_df.loc['op_GO'].values
    else:
        raise ValueError("'op_GO' (Gross Output) not found in the output table.")

    return gross_output

def calculate_technical_coefficients(interindustry_df, gross_output):
    """
    Calculate the technical coefficient matrix (A) based on the interindustry matrix and gross output.
    A_ij = Z_ij / X_j
    """
    print(f"Interindustry shape: {interindustry_df.shape}")
    print(f"Gross Output length: {len(gross_output)}")

    # Ensure the number of columns matches the length of the gross output
    if interindustry_df.shape[1] != len(gross_output):
        raise ValueError(f"Mismatch between interindustry columns ({interindustry_df.shape[1]}) and gross output length ({len(gross_output)})")

    # Avoid division by zero by creating a mask
    mask = gross_output > 0

    # Initialize an empty technical coefficient matrix with the same shape as interindustry_df
    A = np.zeros_like(interindustry_df.values)

    # Calculate the technical coefficients: A_ij = Z_ij / X_j (column-wise division)
    for j in range(interindustry_df.shape[1]):
        if mask[j]:
            A[:, j] = interindustry_df.values[:, j] / gross_output[j]

    # Remove prefixes in index and columns
    interindustry_df.index = interindustry_df.index.str.replace(r'^op_', '', regex=True)
    interindustry_df.columns = interindustry_df.columns.str.replace(r'^ip_', '', regex=True)

    return pd.DataFrame(A, index=interindustry_df.index, columns=interindustry_df.columns)

# ---------------------------------------------------
# 2. Process Each Year Folder
# ---------------------------------------------------

def process_year_folder(year, input_dir, output_dir):
    """
    Process each year folder to calculate and save the technical coefficient matrix.
    """
    print(f"Processing year {year}...")

    year_folder = os.path.join(input_dir, str(year))
    interindustry_file = os.path.join(year_folder, f'interindustry_table_{year}.csv')
    output_file = os.path.join(year_folder, f'output_table_{year}.csv')

    # Load the interindustry and output tables
    interindustry_df = pd.read_csv(interindustry_file, index_col=0)
    output_df = pd.read_csv(output_file, index_col=0)

    # Extract the gross output from the output table
    gross_output = extract_gross_output(output_df)

    # Calculate technical coefficients
    technical_coeff_df = calculate_technical_coefficients(interindustry_df, gross_output)

    # Ensure the output directory (technical_coefficients folder) exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Save the technical coefficient matrix to a new CSV file in the technical coefficients folder
    technical_coeff_file = os.path.join(output_dir, f'technical_coefficients_{year}.csv')
    technical_coeff_df.to_csv(technical_coeff_file)

    print(f"Saved technical coefficients for year {year} at {technical_coeff_file}")

# ---------------------------------------------------
# 3. Process All Years
# ---------------------------------------------------

def process_all_years(input_dir, output_dir):
    """
    Process all year folders in the input directory to calculate technical coefficients.
    """
    years = [d for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))]

    for year in sorted(years):
        process_year_folder(year, input_dir, output_dir)

# Example usage
input_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/processed_tables'
output_dir = 'C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients'

# Process all years
process_all_years(input_dir, output_dir)


Processing year 2010...
Interindustry shape: (2576, 2576)
Gross Output length: 2576
Saved technical coefficients for year 2010 at C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients\technical_coefficients_2010.csv
Processing year 2011...
Interindustry shape: (2576, 2576)
Gross Output length: 2576
Saved technical coefficients for year 2011 at C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients\technical_coefficients_2011.csv
Processing year 2012...
Interindustry shape: (2576, 2576)
Gross Output length: 2576
Saved technical coefficients for year 2012 at C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients\technical_coefficients_2012.csv
Processing year 2013...
Interindustry shape: (2576, 2576)
Gross Output length: 2576
Saved technical coefficients for year 2013 at C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients\technical_coefficients_2013.csv
Processing year 2014