In [1]:
import pandas as pd
import numpy as np
from openpyxl import load_workbook
import ast
import os



In [2]:
# Filtering function for report_df
def correct_list_string(s):
    # Check if the string starts with [ and ends with ] to ensure it looks like a list
    s = s.replace('?', '')

    if s.startswith('[') and s.endswith(']'):
        # Extract the contents inside the brackets and split them by comma
        items = s[1:-1].split(',')
        
        # Process each item to ensure it has single quotes around it
        corrected_items = []
        for item in items:
            # Strip spaces and any extra single quotes
            item_stripped = item.strip().strip("'")
            corrected_items.append(f"'{item_stripped}'")
        
        # Reconstruct the list string
        return '[' + ', '.join(corrected_items) + ']'
    return s

def filter_data(row):
    conditions = []
    debug_info = [] # To collect debug information

    # Create the unique ID from the filter_df's first three columns
    unique_id = f"{row['WorkbookID']}_{row['SheetID']}_{row['RowID']}"

    for col, value in row.items():
        # Skip the first three columns, columns that are not in report_df, columns with '*' as value, 
        # and the 'Year' column
        if col in ['WorkbookID', 'SheetID', 'RowID'] or col not in report_df.columns or value == "*":
            continue

         # If value is a string, remove spaces at the end
        if isinstance(value, str):
            value = value.rstrip()

        # Check and correct the string representation of list if necessary
        if isinstance(value, str) and value.startswith("["):
            if col != 'Year':  # Only correct if the column is not 'Year'
                value = correct_list_string(value)  # Correct the string representation
            value = ast.literal_eval(value)  # Convert the string to an actual list

        # If value is a list, check if the column value in report_df is in the list
        if isinstance(value, list):
            conditions.append(report_df[col].isin(value))
            debug_info.append(f"{col} is in {value}") # For debugging
        else:
            conditions.append(report_df[col] == value)
            debug_info.append(f"{col} == {value}") # For debugging
            

    
    # If there are no conditions to check, return the entire report_df
    if not conditions:
        return report_df
    
    # Combine all conditions with an AND operator
    combined_condition = conditions[0]
    for cond in conditions[1:]:
        combined_condition &= cond

    result = report_df[combined_condition].copy()  # Ensure not to modify the original dataframe
    result['UniqueID'] = unique_id  # Add the unique ID to the filtered result
    if result.empty:
        print(f"Filter {debug_info} resulted in empty dataframe.") # This will show the failed conditions
        

    return result

# Convert SATIMGE values based on Unit column
def convert_value(row):
    factor = conversion_factors.get(row['TargetUnit'])
    if factor:
        result_value = row['SATIMGE'] * factor
        return np.round(result_value, 6)
    # If unit not found in conversion_factors, return original value
    return row['SATIMGE']


In [3]:
path_Report = "input/REPORT_00_IMAGINE_runs.csv" # add/replace file path here or filename if in same directory
path_SetsAndMaps = "input/SetsAndMaps.xlsm" # add/replace file path here or filename if in same directory
path_Output = "input/merged_data.csv" # add/replace file path here or filename if in same directory

# Read in the DataFrames
report_df = pd.read_csv(path_Report, low_memory=False)
mapPRC_df = pd.read_excel(path_SetsAndMaps, sheet_name="mapPRC")
mapCOM_df = pd.read_excel(path_SetsAndMaps, sheet_name="mapCOM")

# Replace 'Eps' values with 0
report_df['SATIMGE'] = report_df['SATIMGE'].replace('Eps', 0)
report_df['SATIMGE'] = report_df['SATIMGE'].astype(float)

# Merge the DataFrames
merged_df = report_df.merge(mapPRC_df, on='Process', how='left')
merged_df = merged_df.merge(mapCOM_df, on='Commodity', how='left')

# Reset index
merged_df.reset_index(drop=True, inplace=True)

# merged_df.to_csv(path_Output, index=False)


In [4]:
#ADJUST THE EMISSIONS TO CO2EQ:

# set the emissions factors
emf_df = pd.DataFrame({'Indicator': ['CH4','N2O','C2F6','CF4'],
                    'E_factor': [28, 265,11100,6630]})

# Merge the two DataFrames based on the 'Indicator' column
merged_df = merged_df.merge(emf_df, on='Indicator', how='left')

# Fill missing values in 'Multiplier' column with 1
merged_df['E_factor'].fillna(1, inplace=True)

# Multiply the values from 'SATIMGE' by the 'E_factor' column
merged_df['Result'] = merged_df['SATIMGE'] * merged_df['E_factor']

# Optionally, drop the 'Multiplier' column or rename columns
merged_df = merged_df.drop('SATIMGE', axis=1) #dropping the original SATIMGE values.
merged_df = merged_df.drop('E_factor', axis=1)
#rename to new SATIMGE
merged_df = merged_df.rename(columns={'Result': 'SATIMGE'}) #new SATIMGE values

# Reset index
merged_df.reset_index(drop=True, inplace=True)

merged_df.head(30)

Unnamed: 0,Process,Commodity,Year,Scenario,Indicator,Sector,Subsector,Subsubsector,TechDescription,IPCC_Category_L1,IPCC_Category_L2,IPCC_Category_L3,IPCC_Category_L4,Description,Short Description,Commodity_Name,SATIMGE
0,SATIMMarginals,CO2CAPT,2019,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883
1,SATIMMarginals,CO2CAPT,2019,NZ10_2099B_99NESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883
2,SATIMMarginals,CO2CAPT,2020,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,0.177243
3,SATIMMarginals,CO2CAPT,2020,NZ10_2099B_99NESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883
4,SATIMMarginals,CO2CAPT,2021,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883
5,SATIMMarginals,CO2CAPT,2021,NZ10_2099B_99NESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883
6,SATIMMarginals,CO2CAPT,2022,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,0.144287
7,SATIMMarginals,CO2CAPT,2022,NZ10_2099B_99NESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883
8,SATIMMarginals,CO2CAPT,2023,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.118207
9,SATIMMarginals,CO2CAPT,2023,NZ10_2099B_99NESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,CO2 Captured ...,CO2 Captured ...,0,-0.438883


In [5]:
scenarios = merged_df['Scenario'].unique() #array 
scenarios

array(['NZ10_2050A_08EESSNXCT_IMG', 'NZ10_2099B_99NESSNXCT-IMG',
       'NZ10_2050A_09EESSNXCT-IMG'], dtype=object)

In [6]:

#path where all filters are
filter_folder = 'input\\FilterScripts\\'
curr_wdr = os.getcwd()
filters_path = os.path.join(curr_wdr,filter_folder)

# Initialize an empty list to store DataFrames
dataframes = []

# Iterate over files in the directory
for filename in os.listdir(filters_path):
    if filename.endswith('.xlsx'):
        print(filename)
        file_path = os.path.join(filters_path, filename)
        df = pd.read_excel(file_path)
        df = df.dropna()
        dataframes.append(df)

# Concatenate all DataFrames into one
combined_df = pd.concat(dataframes, ignore_index=True)

# Drop rows with NaN values
combined_df = combined_df.dropna()

# Fill NaN values with 0 for the specified columns
columns_to_convert = ['WorkbookID', 'SheetID', 'RowID']

if not combined_df.empty:
    combined_df[columns_to_convert] = combined_df[columns_to_convert].fillna(0).astype(int)

# Split the 'TargetUnit' column if needed
Units = combined_df[['WorkbookID', 'SheetID', 'RowID', 'TargetUnit']]
combined_df = combined_df.drop(columns=['TargetUnit'])

# Reset the index
combined_df.reset_index(drop=True, inplace=True)


# Now, 'combined_df' contains data from all the Excel files in the folder
filter_df = combined_df



commerce_filters.xlsx
pass_transport_filter.xlsx
res_filters.xlsx


### rename the scenarios according to Workbook ID

In [7]:
# Create a mapping of WorkbookID values to corresponding scenario names
workbook_scenario_mapping = {
    1: 'NZ10_2099B_99NESSNXCT-IMG',
    2: 'NZ10_2050A_08EESSNXCT_IMG',
    3: 'NZ10_2050A_09EESSNXCT-IMG'
}

# Define a function to update the 'Scenario' column based on conditions
def update_scenario(row):
    scenario = row['Scenario']
    workbook_id = row['WorkbookID']
    
    # Check if the scenario name is in the mapping and matches the WorkbookID
    if scenario in workbook_scenario_mapping and workbook_scenario_mapping[workbook_id] == scenario:
        return scenario  # No change is needed
        
    # Check if the scenario name is in the mapping but doesn't match the WorkbookID
    if scenario in workbook_scenario_mapping and workbook_scenario_mapping[workbook_id] != scenario:
        return workbook_scenario_mapping[workbook_id]  # Update to the correct scenario
    
    # If the scenario is not in the mapping, return the scenario based on WorkbookID
    if workbook_id in workbook_scenario_mapping:
        return workbook_scenario_mapping[workbook_id]
    
    # If neither scenario nor WorkbookID are in the mapping, return the existing scenario
    return scenario

# Update the 'Scenario' column in filter_df based on the defined conditions
filter_df['Scenario'] = filter_df.apply(update_scenario, axis=1)



#testing an edit

In [8]:
filter_df

Unnamed: 0,WorkbookID,SheetID,RowID,Process,Commodity,Year,Scenario,Indicator,SATIMGE,Sector,Subsector,Subsubsector,TechDescription,IPCC_Category_L1,IPCC_Category_L2,IPCC_Category_L3,IPCC_Category_L4,Description,Short Description,Commodity_Name
0,1,18,38,*,*,"[2020, 2030, 2040, 2050]",NZ10_2099B_99NESSNXCT-IMG,['Activity'],*,['Commerce'],*,*,*,*,*,*,*,*,*,*
1,1,18,40,*,*,"[2020, 2030, 2040, 2050]",NZ10_2099B_99NESSNXCT-IMG,['Activity'],*,['Commerce'],*,['Space Heating'],*,*,*,*,*,*,*,*
2,1,18,41,*,*,"[2020, 2030, 2040, 2050]",NZ10_2099B_99NESSNXCT-IMG,['Activity'],*,['Commerce'],*,['Cooling'],*,*,*,*,*,*,*,*
3,1,18,42,*,*,"[2020, 2030, 2040, 2050]",NZ10_2099B_99NESSNXCT-IMG,['Activity'],*,['Commerce'],*,['Cooking'],*,*,*,*,*,*,*,*
4,1,18,43,*,*,"[2020, 2030, 2040, 2050]",NZ10_2099B_99NESSNXCT-IMG,['Activity'],*,['Commerce'],*,['Water Heating'],*,*,*,*,*,*,*,*
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,3,16,130,*,['RESBIW'],"[2020, 2030, 2040, 2050]",NZ10_2050A_09EESSNXCT-IMG,['CO2'],*,['Residential'],*,*,*,*,*,*,*,*,*,*
385,3,16,133,*,*,"[2020, 2030, 2040, 2050]",NZ10_2050A_09EESSNXCT-IMG,"['N2O', 'CH4']",*,['Residential'],*,*,*,*,*,*,*,*,*,*
386,1,16,19,*,*,"[2020, 2030, 2040, 2050]",NZ10_2099B_99NESSNXCT-IMG,*,*,['Population'],*,*,*,*,*,*,*,*,*,*
387,2,16,19,*,*,"[2020, 2030, 2040, 2050]",NZ10_2050A_08EESSNXCT_IMG,*,*,['Population'],*,*,*,*,*,*,*,*,*,*


In [9]:
Units['UniqueID'] = Units['WorkbookID'].astype(str) + '_' + Units['SheetID'].astype(str) + '_' + Units['RowID'].astype(str)
Units.drop(columns=['WorkbookID', 'SheetID', 'RowID'], inplace=True)

In [10]:
Units['UniqueID']

0       1_18_38
1       1_18_40
2       1_18_41
3       1_18_42
4       1_18_43
         ...   
384    3_16_130
385    3_16_133
386     1_16_19
387     2_16_19
388     3_16_19
Name: UniqueID, Length: 389, dtype: object

In [11]:
columns = ['Process', 'Commodity', 'Year', 'Scenario', 'Indicator',
       'Sector', 'Subsector', 'Subsubsector', 'TechDescription',
       'IPCC_Category_L1', 'IPCC_Category_L2', 'IPCC_Category_L3',
       'IPCC_Category_L4', 'Description', 'Short Description',
       'Commodity_Name']

for col in columns:
    print(col, " unique values: \n", merged_df[col].unique(), "\n \n", '--------------------------------########################--------------------', '\n \n')

#ORG_NZ10_8

Process  unique values: 
 ['SATIMMarginals' 'I2A1' 'I2A3' 'I2B2' 'I2B5' 'I2B6' 'I2B8' 'I2C1' 'I2C2'
 'I2C3' 'I2C5' 'I2C6' 'I2D1' 'I2D2' 'I2F1' 'I2F2' 'I2F3' 'I2F4' 'A3A1a'
 'A3A1c' 'A3A1d' 'A3A1f' 'A3A1g' 'A3A1h' 'A3A2a' 'A3A2c' 'A3A2d' 'A3A2f'
 'A3A2g' 'A3A2h' 'A3A2i' 'L3B1a' 'L3B1b' 'L3B2a' 'L3B2b' 'L3B3a' 'L3B3b'
 'L3B4' 'L3B5a' 'L3B5b' 'L3B6b' 'A3C1a' 'A3C1b' 'A3C1c' 'A3C1d' 'A3C1e'
 'A3C2' 'A3C3' 'A3C4' 'A3C5' 'A3C6' 'L3D1' 'W4A' 'W4B' 'W4C2' 'W4D1'
 'AHCOA-E' 'AHELC-E' 'AHOHF-E' 'AHOKE-E' 'AIELC-E' 'AIODS-E' 'AOELC-E'
 'APELC-E' 'ATELC-E' 'ATODS-E' 'CECELCCEN-E' 'CECELCCEN-N' 'CECELCCHIL-E'
 'CECELCHP-E' 'CECELCROOM-E' 'CEGELCHAL-E' 'CEGELCHPM-E' 'CEGELCHPS-E'
 'CEGELCINC-E' 'CEGELCLED-N' 'CEGELCMHL-E' 'CEHCOA-E' 'CEHELC-E'
 'CEHELC-N' 'CEHGAS-E' 'CEHGAS-N' 'CEKCOA-E' 'CEKELC-E' 'CEKOLP-E'
 'CELELCCFL-E' 'CELELCFLU-E' 'CELELCHAL-E' 'CELELCHID-E' 'CELELCINC-E'
 'CELELCLED-N' 'CEOELC-E' 'CEOODS-E' 'CEOOHF-E' 'CERELC-E' 'CERELC-N'
 'CETELC-E' 'CEWCOA-E' 'CEWELC-E' 'CEWELCHP-N' 'CEWG

In [12]:
report_df = merged_df

# Instantiate df list
filtered_dfs = []
i = 1
# Iterate through each row in filter_df and filter report_df
for _, row in filter_df.iterrows():
    filtered_dfs.append(filter_data(row))
    

# Concatenate all dataframes in filtered_dfs to get a single dataframe
final_df = pd.concat(filtered_dfs, ignore_index=True)

# Export 
# final_df.to_csv('input/results_filter.csv', index = False)

final_df.reset_index(drop=True, inplace=True)

Filter ["Commodity is in ['COMOLP', 'COMOKE', 'COMOHF', 'COMODS', 'COMOGS']", 'Year is in [2020, 2030, 2040, 2050]', 'Scenario == NZ10_2099B_99NESSNXCT-IMG', "Indicator is in ['FlowIn']", "Sector is in ['Commerce']", "Subsubsector is in ['Space Heating']"] resulted in empty dataframe.
Filter ["Commodity is in ['COMOLP', 'COMOKE', 'COMOHF', 'COMODS', 'COMOGS']", 'Year is in [2020, 2030, 2040, 2050]', 'Scenario == NZ10_2099B_99NESSNXCT-IMG', "Indicator is in ['FlowIn']", "Sector is in ['Commerce']", "Subsubsector is in ['Cooling']"] resulted in empty dataframe.
Filter ["Commodity is in ['COMGAS']", 'Year is in [2020, 2030, 2040, 2050]', 'Scenario == NZ10_2099B_99NESSNXCT-IMG', "Indicator is in ['FlowIn']", "Sector is in ['Commerce']", "Subsubsector is in ['Cooling']"] resulted in empty dataframe.
Filter ["Commodity is in ['COMCOA']", 'Year is in [2020, 2030, 2040, 2050]', 'Scenario == NZ10_2099B_99NESSNXCT-IMG', "Indicator is in ['FlowIn']", "Sector is in ['Commerce']", "Subsubsector is 

In [13]:
final_df

Unnamed: 0,Process,Commodity,Year,Scenario,Indicator,Sector,Subsector,Subsubsector,TechDescription,IPCC_Category_L1,IPCC_Category_L2,IPCC_Category_L3,IPCC_Category_L4,Description,Short Description,Commodity_Name,SATIMGE,UniqueID
0,CECELCCEN-E,ACTGRP,2020,NZ10_2099B_99NESSNXCT-IMG,Activity,Commerce,ComExisting,Cooling,Commercial - Cooling - Electricity - Central -...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4a Commercial/Institutional,Y,0,0,84.296274,1_18_38
1,CECELCCEN-E,ACTGRP,2030,NZ10_2099B_99NESSNXCT-IMG,Activity,Commerce,ComExisting,Cooling,Commercial - Cooling - Electricity - Central -...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4a Commercial/Institutional,Y,0,0,66.127038,1_18_38
2,CECELCCEN-E,ACTGRP,2040,NZ10_2099B_99NESSNXCT-IMG,Activity,Commerce,ComExisting,Cooling,Commercial - Cooling - Electricity - Central -...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4a Commercial/Institutional,Y,0,0,24.237112,1_18_38
3,CECELCCEN-E,ACTGRP,2050,NZ10_2099B_99NESSNXCT-IMG,Activity,Commerce,ComExisting,Cooling,Commercial - Cooling - Electricity - Central -...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4a Commercial/Institutional,Y,0,0,0.672616,1_18_38
4,CECELCCEN-N,ACTGRP,2030,NZ10_2099B_99NESSNXCT-IMG,Activity,Commerce,ComExisting,Cooling,Commercial - Cooling - Electricity - Central -...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4a Commercial/Institutional,Y,0,0,23.578447,1_18_38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8861,RMEWOLPKETLPG-E,RESOLP,2030,NZ10_2050A_09EESSNXCT-IMG,N2O,Residential,MiddleIncome,Water Heating,Residential - Water Heating - Middle Income El...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4b Residential,Residential - Oil LPG ...,LPG,LPG,0.073736,3_16_133
8862,RMEWOLPKETLPG-E,RESOLP,2040,NZ10_2050A_09EESSNXCT-IMG,CH4,Residential,MiddleIncome,Water Heating,Residential - Water Heating - Middle Income El...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4b Residential,Residential - Oil LPG ...,LPG,LPG,0.140180,3_16_133
8863,RMEWOLPKETLPG-E,RESOLP,2040,NZ10_2050A_09EESSNXCT-IMG,N2O,Residential,MiddleIncome,Water Heating,Residential - Water Heating - Middle Income El...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4b Residential,Residential - Oil LPG ...,LPG,LPG,0.044224,3_16_133
8864,RMEWOLPKETLPG-E,RESOLP,2050,NZ10_2050A_09EESSNXCT-IMG,CH4,Residential,MiddleIncome,Water Heating,Residential - Water Heating - Middle Income El...,1 Energy,1A Fuel Combustion Activities,1A4 Other Sectors,1A4b Residential,Residential - Oil LPG ...,LPG,LPG,0.005343,3_16_133


In [14]:
# Initialize an empty list
cumulativeIDs = []

# Loop through N values
for N in [1, 2, 3]:
    # Loop through M values
    for M in range(109, 124):  # This will loop from 127 to 143 inclusive
        # Construct the string using the pattern and append to the list
        cumulativeIDs.append(f"{N}_24_{M}") # 24 = power tab. 
        
# Print the list
print(cumulativeIDs)


['1_24_109', '1_24_110', '1_24_111', '1_24_112', '1_24_113', '1_24_114', '1_24_115', '1_24_116', '1_24_117', '1_24_118', '1_24_119', '1_24_120', '1_24_121', '1_24_122', '1_24_123', '2_24_109', '2_24_110', '2_24_111', '2_24_112', '2_24_113', '2_24_114', '2_24_115', '2_24_116', '2_24_117', '2_24_118', '2_24_119', '2_24_120', '2_24_121', '2_24_122', '2_24_123', '3_24_109', '3_24_110', '3_24_111', '3_24_112', '3_24_113', '3_24_114', '3_24_115', '3_24_116', '3_24_117', '3_24_118', '3_24_119', '3_24_120', '3_24_121', '3_24_122', '3_24_123']


In [15]:
file_names = ['input/WB1.xlsx', 'input/WB2.xlsx', 'input/WB3.xlsx']

# Predefined year_to_column mapping.
year_to_column = {2020: 4, 2030: 5, 2040: 6, 2050: 7, 2060: 8}

# Aggregate the data by UniqueID and Year, summing SATIMGE values.
aggregated = final_df.groupby(['UniqueID', 'Year'])['SATIMGE'].sum().reset_index()

# Define the range of 'certain years'.
start_year = 2020  # adjust accordingly
end_year = 2060    # adjust accordingly

# Filter out the special rows using the cumulativeIDs list.
special_rows = aggregated[aggregated['UniqueID'].isin(cumulativeIDs)]

# For these special rows, compute cumulative sums in ten-year steps.
cumulative_sums = []

for unique_id in cumulativeIDs:
    subset = special_rows[special_rows['UniqueID'] == unique_id]
    for year in range(start_year, end_year, 10):
        end = year + 10
        total_sum = subset[(subset['Year'] >= year) & (subset['Year'] < end)]['SATIMGE'].sum()
        cumulative_sums.append((unique_id, year, total_sum))

# Convert the result into a DataFrame.
cumulative_df = pd.DataFrame(cumulative_sums, columns=['UniqueID', 'StartYear', 'CumulativeSATIMGE'])


In [16]:
cumulative_df.head(200)

Unnamed: 0,UniqueID,StartYear,CumulativeSATIMGE
0,1_24_109,2020,0.0
1,1_24_109,2030,0.0
2,1_24_109,2040,0.0
3,1_24_109,2050,0.0
4,1_24_110,2020,0.0
...,...,...,...
175,3_24_122,2050,0.0
176,3_24_123,2020,0.0
177,3_24_123,2030,0.0
178,3_24_123,2040,0.0


In [17]:
aggregated = pd.merge(aggregated, Units, on='UniqueID', how='left')

In [18]:
# Conversion factors to convert everything to 'm' (for example)
conversion_factors = {
    'Mt': 0.001, #units in model for emissions are already kt.
    'USD': 1/12.94, # to 2015 USD
    'kt': 1, #all emissions are in kt
    'GW': 1, 
    'TWh': 1/3.6, #PJ to TWh
    'EJ': 0.001, #PJ to EJ
    '*': 1,
    '': 1,
    'million inhab': 0.001,
    'gas_Nm3': 25641025,#Nm3 of NGas per PJ. convert PJ of NGas to nM3 of gas    
    'crude_Mt':1/42, #Mt of Ngas per PJ. convert PJ of crude oil to Mt of crude oil
    'biomass_Mt':1/16, #Mt of biomass per PJ. based on 16MJ/kg convert PJ of biomass to Mt of biomass
    'H2_Nm':78740157, #Nm3 per PJ. convert PJ of hydrogen to Nm3
    'Mio' : 0.001,
    'pkm' : 0.000001, #bpkm default unit in the model
    'Gvkm' : 1, #bvkm default unit in the model

    
}

aggregated['SATIMGE'] = aggregated.apply(convert_value, axis=1)

In [19]:
type(conversion_factors)

dict

In [20]:
decades = list(range(start_year, end_year, 10))  # Adjust as per your requirement
cumulative_sums = {}

for unique_id, group in aggregated.groupby('UniqueID'):
    if unique_id in cumulativeIDs:
        decade_sums = {}
        
        for start_year in decades:
            end_year = start_year + 10
            decade_data = group[(group['Year'] >= start_year) & (group['Year'] < end_year)]
            decade_sums[start_year] = decade_data['SATIMGE'].sum()
        
        cumulative_sums[unique_id] = decade_sums

# Print out some of the cumulative_sums for debugging
for key, value in list(cumulative_sums.items())[:5]:
    print(key, value)


In [21]:
# 
aggregated = aggregated[~aggregated['UniqueID'].isin(cumulativeIDs)] #tilda is negation. aggregated NOT in...
aggregated = aggregated[aggregated['Year'].isin([2020,2030,2040,2050,2060,2070])]

In [22]:
Units['UniqueID']

0       1_18_38
1       1_18_40
2       1_18_41
3       1_18_42
4       1_18_43
         ...   
384    3_16_130
385    3_16_133
386     1_16_19
387     2_16_19
388     3_16_19
Name: UniqueID, Length: 389, dtype: object

In [23]:
decade_to_column = {
    2020: 4,
    2030: 5,
    2040: 6,
    2050: 7,
    2060: 8,
    2070: 9,
    # Adjust as needed
}

BASE_COLUMN = 4  # Adjust as needed

grouped = aggregated.groupby(['UniqueID'])

for (unique_id_string,), group in grouped:
    workbook_id, sheet_id, row_id = map(int, unique_id_string.split('_'))
    print(f"processing workbook {workbook_id}, sheet {sheet_id}, row {row_id}")

    # Use the workbook_id to select the correct workbook filename
    if 0 < workbook_id <= len(file_names):
        file_name = file_names[workbook_id - 1]  # Adjust for 0-based indexing
    else:
        raise ValueError(f"Workbook ID {workbook_id} is out of range.")
    
    try:
        book = load_workbook(file_name)
    except FileNotFoundError:
        print(f"File {file_name} not found. Skipping to the next file.")
        continue

    sheet_names = book.sheetnames
    if 0 <= sheet_id - 1 < len(sheet_names):
        sheet_name = sheet_names[sheet_id - 1]
        sheet = book[sheet_name]
    else:
        raise ValueError(f"Sheet ID {sheet_id} is out of range for workbook {file_name}.")

    # Check if the unique_id_string is in the special rows
    # Check if the unique_id_string is in the special rows and exists in cumulative_sums
    if unique_id_string in cumulativeIDs and unique_id_string in cumulative_sums:
            print(f'Entering cumulative logic for {unique_id_string}')
            for decade_start, sum_val in cumulative_sums[unique_id_string].items():
                column_index = decade_to_column[decade_start]
                sheet.cell(row=row_id, column=column_index, value=sum_val)
    else:
        # print(f'Entering non-cumulative logic for {unique_id_string}')
        for _, row in group.iterrows():
            if row['Year'] in year_to_column:
                column_index = year_to_column[row['Year']]
                sheet.cell(row=row_id, column=column_index, value=row['SATIMGE'])
            else:
                print(f"Warning: Year {row['Year']} not in year_to_column for unique_id_string {unique_id_string}")

    book.save(file_name)


print("Done")


processing workbook 1, sheet 16, row 102
processing workbook 1, sheet 16, row 103
processing workbook 1, sheet 16, row 104
processing workbook 1, sheet 16, row 106
processing workbook 1, sheet 16, row 110
processing workbook 1, sheet 16, row 113
processing workbook 1, sheet 16, row 115
processing workbook 1, sheet 16, row 117
processing workbook 1, sheet 16, row 118
processing workbook 1, sheet 16, row 119
processing workbook 1, sheet 16, row 122
processing workbook 1, sheet 16, row 123
processing workbook 1, sheet 16, row 124
processing workbook 1, sheet 16, row 133
processing workbook 1, sheet 16, row 38
processing workbook 1, sheet 16, row 40
processing workbook 1, sheet 16, row 42
processing workbook 1, sheet 16, row 43
processing workbook 1, sheet 16, row 44
processing workbook 1, sheet 16, row 45
processing workbook 1, sheet 16, row 47
processing workbook 1, sheet 16, row 48
processing workbook 1, sheet 16, row 49
processing workbook 1, sheet 16, row 51
processing workbook 1, she

In [24]:
x = merged_df

#x[(x['Year'] == 2020)&(x['Subsector'] == 'BioRef')&(x['Indicator']=='FlowOut')]['Short Description'].unique()

x[(x['Commodity']=='IFACHA')]
#x['Commodity'].unique()

Unnamed: 0,Process,Commodity,Year,Scenario,Indicator,Sector,Subsector,Subsubsector,TechDescription,IPCC_Category_L1,IPCC_Category_L2,IPCC_Category_L3,IPCC_Category_L4,Description,Short Description,Commodity_Name,SATIMGE
5523,SATIMMarginals,IFACHA,2025,NZ10_2050A_09EESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,146.974296
5524,SATIMMarginals,IFACHA,2025,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,162.673390
5525,SATIMMarginals,IFACHA,2026,NZ10_2050A_09EESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,174.221608
5526,SATIMMarginals,IFACHA,2026,NZ10_2050A_08EESSNXCT_IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,168.968492
5527,SATIMMarginals,IFACHA,2026,NZ10_2099B_99NESSNXCT-IMG,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,100.058780
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494304,XINDBIO,IFACHA,2054,NZ10_2050A_08EESSNXCT_IMG,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,65.645141
494305,XINDBIO,IFACHA,2054,NZ10_2099B_99NESSNXCT-IMG,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,56.679070
494306,XINDBIO,IFACHA,2055,NZ10_2050A_09EESSNXCT-IMG,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,65.535333
494307,XINDBIO,IFACHA,2055,NZ10_2050A_08EESSNXCT_IMG,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,65.323263
