In [None]:
import pandas as pd
import os
import glob
import numpy as np

In [None]:
#directory contains the projected EC files
directory = ''

In [None]:
combined_results = pd.DataFrame()

# Loop through each file in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        # Read the CSV file
        filepath = os.path.join(directory, filename)
        data = pd.read_csv(filepath)

        # Function to remove outliers based on the IQR method
        def remove_outliers(df, column):
            Q1 = df[column].quantile(0.25)
            Q3 = df[column].quantile(0.75)
            IQR = Q3 - Q1
            return df[~((df[column] < (Q1 - 1.5 * IQR)) | (df[column] > (Q3 + 1.5 * IQR)))]

        # Apply outlier removal for both EC_per_capita and EC_per_capita_2019
        data = remove_outliers(data, 'EC_per_capita')
        data_filtered = remove_outliers(data, 'EC_per_capita_2019')

        # Calculate the regional sums for TP and TP_2019
        region_TP_sums = data.groupby('Region')['TP'].sum().rename('TP_sum')
        region_TP_2019_sums = data_filtered.groupby('Region')['TP_2019'].sum().rename('TP_2019_sum')

        # Merge these sums back to the respective dataframes
        data = data.merge(region_TP_sums, on='Region')
        data_filtered = data_filtered.merge(region_TP_2019_sums, on='Region')

        # Calculate weighted EC per capita
        data['weighted_EC'] = (data['TP'] / data['TP_sum']) * data['EC_per_capita']
        data_filtered['weighted_EC_2019'] = (data_filtered['TP_2019'] / data_filtered['TP_2019_sum']) * data_filtered['EC_per_capita_2019']

        # Sum up weighted EC per capita by region
        grouped_data = data.groupby('Region')['weighted_EC'].sum().reset_index()
        grouped_data_2019 = data_filtered.groupby('Region')['weighted_EC_2019'].sum().reset_index()

        # Combine the 2019 and non-2019 data
        grouped_data = pd.merge(grouped_data, grouped_data_2019, on='Region', how='outer')

        # Extract SSP scenario from filename (e.g., SSP126 from SSP126_Predictions.csv)
        ssp_scenario = filename.split('_')[0]

        # Add a new column for the SSP scenario
        grouped_data['SSP'] = ssp_scenario

        # Append to the combined DataFrame
        combined_results = pd.concat([combined_results, grouped_data], ignore_index=True)