In [22]:
import pandas as pd
import numpy as np
from openpyxl import load_workbook
import ast
import os

In [23]:
# Filtering function for report_df
def correct_list_string(s):
    # Check if the string starts with [ and ends with ] to ensure it looks like a list
    s = s.replace('?', '')

    if s.startswith('[') and s.endswith(']'):
        # Extract the contents inside the brackets and split them by comma
        items = s[1:-1].split(',')
        
        # Process each item to ensure it has single quotes around it
        corrected_items = []
        for item in items:
            # Strip spaces and any extra single quotes
            item_stripped = item.strip().strip("'")
            corrected_items.append(f"'{item_stripped}'")
        
        # Reconstruct the list string
        return '[' + ', '.join(corrected_items) + ']'
    return s

def filter_data(row):
    conditions = []
    debug_info = [] # To collect debug information

    # Create the unique ID from the filter_df's first three columns
    unique_id = f"{row['WorkbookID']}_{row['SheetID']}_{row['RowID']}"

    for col, value in row.items():
        # Skip the first three columns, columns that are not in report_df, columns with '*' as value, 
        # and the 'Year' column
        if col in ['WorkbookID', 'SheetID', 'RowID'] or col not in report_df.columns or value == "*":
            continue

         # If value is a string, remove spaces at the end
        if isinstance(value, str):
            value = value.rstrip()

        # Check and correct the string representation of list if necessary
        if isinstance(value, str) and value.startswith("["):
            if col != 'Year':  # Only correct if the column is not 'Year'
                value = correct_list_string(value)  # Correct the string representation
            value = ast.literal_eval(value)  # Convert the string to an actual list

        # If value is a list, check if the column value in report_df is in the list
        if isinstance(value, list):
            conditions.append(report_df[col].isin(value))
            debug_info.append(f"{col} is in {value}") # For debugging
        else:
            conditions.append(report_df[col] == value)
            debug_info.append(f"{col} == {value}") # For debugging
            

    
    # If there are no conditions to check, return the entire report_df
    if not conditions:
        return report_df
    
    # Combine all conditions with an AND operator
    combined_condition = conditions[0]
    for cond in conditions[1:]:
        combined_condition &= cond

    result = report_df[combined_condition].copy()  # Ensure not to modify the original dataframe
    result['UniqueID'] = unique_id  # Add the unique ID to the filtered result
    if result.empty:
        print(f"Filter {debug_info} resulted in empty dataframe.") # This will show the failed conditions
        

    return result

# Convert SATIMGE values based on Unit column
def convert_value(row):
    factor = conversion_factors.get(row['TargetUnit'])
    if factor:
        result_value = row['SATIMGE'] * factor
        return np.round(result_value, 6)
    # If unit not found in conversion_factors, return original value
    return row['SATIMGE']


In [24]:
path_Report = "input/REPORT_00.csv" # add/replace file path here or filename if in same directory
path_SetsAndMaps = "input/SetsAndMaps.xlsm" # add/replace file path here or filename if in same directory
path_Output = "input/merged_data.csv" # add/replace file path here or filename if in same directory

# Read in the DataFrames
report_df = pd.read_csv(path_Report, low_memory=False)
mapPRC_df = pd.read_excel(path_SetsAndMaps, sheet_name="mapPRC")
mapCOM_df = pd.read_excel(path_SetsAndMaps, sheet_name="mapCOM")

# Replace 'Eps' values with 0
report_df['SATIMGE'] = report_df['SATIMGE'].replace('Eps', 0)
report_df['SATIMGE'] = report_df['SATIMGE'].astype(float)

# Merge the DataFrames
merged_df = report_df.merge(mapPRC_df, on='Process', how='left')
merged_df = merged_df.merge(mapCOM_df, on='Commodity', how='left')

# Reset index
merged_df.reset_index(drop=True, inplace=True)

# merged_df.to_csv(path_Output, index=False)


In [25]:
#ADJUST THE EMISSIONS TO CO2EQ:

# set the emissions factors
emf_df = pd.DataFrame({'Indicator': ['CH4','N2O','C2F6','CF4'],
                    'E_factor': [28, 265,11100,6630]})

# Merge the two DataFrames based on the 'Indicator' column
merged_df = merged_df.merge(emf_df, on='Indicator', how='left')

# Fill missing values in 'Multiplier' column with 1
merged_df['E_factor'].fillna(1, inplace=True)

# Multiply the values from 'SATIMGE' by the 'E_factor' column
merged_df['Result'] = merged_df['SATIMGE'] * merged_df['E_factor']

# Optionally, drop the 'Multiplier' column or rename columns
merged_df = merged_df.drop('SATIMGE', axis=1) #dropping the original SATIMGE values.
merged_df = merged_df.drop('E_factor', axis=1)
#rename to new SATIMGE
merged_df = merged_df.rename(columns={'Result': 'SATIMGE'}) #new SATIMGE values

# Reset index
merged_df.reset_index(drop=True, inplace=True)

print(merged_df.head(30))

           Process Commodity  Year           Scenario  Indicator Sector  \
0   SATIMMarginals   CO2CAPT  2040  netzero_9_imagine  Marginals    All   
1   SATIMMarginals   CO2CAPT  2041  netzero_9_imagine  Marginals    All   
2   SATIMMarginals   CO2CAPT  2042  netzero_9_imagine  Marginals    All   
3   SATIMMarginals   CO2CAPT  2043  netzero_9_imagine  Marginals    All   
4   SATIMMarginals   CO2CAPT  2044  netzero_9_imagine  Marginals    All   
5   SATIMMarginals   CO2CAPT  2045  netzero_9_imagine  Marginals    All   
6   SATIMMarginals   CO2CAPT  2046  netzero_9_imagine  Marginals    All   
7   SATIMMarginals   CO2CAPT  2047  netzero_9_imagine  Marginals    All   
8   SATIMMarginals   CO2CAPT  2048  netzero_9_imagine  Marginals    All   
9   SATIMMarginals   CO2CAPT  2049  netzero_9_imagine  Marginals    All   
10  SATIMMarginals   CO2CAPT  2050  netzero_9_imagine  Marginals    All   
11  SATIMMarginals   CO2CAPT  2051  netzero_9_imagine  Marginals    All   
12  SATIMMarginals   CO2C

#s_df = pd.read_excel('input/FilterScripts/s_filter.xlsx')
j_df = pd.read_excel('input/FilterScripts/LightIndustry_filters.xlsx')

#j_df = j_df.iloc[start_index:end_index]

j_df = j_df.dropna()
#print(j_df.head(10))
#s_df = s_df.dropna()

# Fill NaN values with 0 for the specified columns
columns_to_convert = ['WorkbookID', 'SheetID', 'RowID']

if not j_df.empty:
    j_df[columns_to_convert] = j_df[columns_to_convert].fillna(0).astype(int)


#if not s_df.empty:
#    s_df[columns_to_convert] = s_df[columns_to_convert].fillna(0).astype(int)

# Concatenate filter dfs
#filter_df = pd.concat([s_df, j_df], ignore_index=True)
filter_df = j_df
Units = filter_df[['WorkbookID', 'SheetID', 'RowID','TargetUnit']]
filter_df = filter_df.drop(columns=['TargetUnit'])
filter_df.reset_index(drop=True, inplace=True)

filter_df.head(10)

In [52]:

#path where all filters are
filter_folder = 'input\\FilterScripts\\'
curr_wdr = os.getcwd()
filters_path = os.path.join(curr_wdr,filter_folder)

# Initialize an empty list to store DataFrames
dataframes = []

# Iterate over files in the directory
for filename in os.listdir(filters_path):
    if filename.endswith('.xlsx'):
        print(filename)
        file_path = os.path.join(filters_path, filename)
        df = pd.read_excel(file_path)
        dataframes.append(df)

# Concatenate all DataFrames into one
combined_df = pd.concat(dataframes, ignore_index=True)

# Drop rows with NaN values
combined_df = combined_df.dropna()

# Fill NaN values with 0 for the specified columns
columns_to_convert = ['WorkbookID', 'SheetID', 'RowID']

if not combined_df.empty:
    combined_df[columns_to_convert] = combined_df[columns_to_convert].fillna(0).astype(int)

# Split the 'TargetUnit' column if needed
Units = combined_df[['WorkbookID', 'SheetID', 'RowID', 'TargetUnit']]
combined_df = combined_df.drop(columns=['TargetUnit'])

# Reset the index
combined_df.reset_index(drop=True, inplace=True)

# Now, 'combined_df' contains data from all the Excel files in the folder

merged_df = combined_df


LightIndustry_filters.xlsx


In [394]:
Units['UniqueID'] = Units['WorkbookID'].astype(str) + '_' + Units['SheetID'].astype(str) + '_' + Units['RowID'].astype(str)
Units.drop(columns=['WorkbookID', 'SheetID', 'RowID'], inplace=True)

In [395]:
columns = ['Process', 'Commodity', 'Year', 'Scenario', 'Indicator',
       'Sector', 'Subsector', 'Subsubsector', 'TechDescription',
       'IPCC_Category_L1', 'IPCC_Category_L2', 'IPCC_Category_L3',
       'IPCC_Category_L4', 'Description', 'Short Description',
       'Commodity_Name']

for col in columns:
    print(col, " unique values: \n", merged_df[col].unique(), "\n \n", '--------------------------------########################--------------------', '\n \n')

#ORG_NZ10_8
#testing an edit for git

Process  unique values: 
 ['SATIMMarginals' 'I2A1' 'I2A3' 'I2B2' 'I2B5' 'I2B6' 'I2B8' 'I2C1' 'I2C2'
 'I2C3' 'I2C5' 'I2C6' 'I2D1' 'I2D2' 'I2F1' 'I2F2' 'I2F3' 'I2F4' 'A3A1a'
 'A3A1c' 'A3A1d' 'A3A1f' 'A3A1g' 'A3A1h' 'A3A2a' 'A3A2c' 'A3A2d' 'A3A2f'
 'A3A2g' 'A3A2h' 'A3A2i' 'L3B1a' 'L3B1b' 'L3B2a' 'L3B2b' 'L3B3a' 'L3B3b'
 'L3B4' 'L3B5a' 'L3B5b' 'L3B6b' 'A3C1a' 'A3C1b' 'A3C1c' 'A3C1d' 'A3C1e'
 'A3C2' 'A3C3' 'A3C4' 'A3C5' 'A3C6' 'L3D1' 'W4A' 'W4B' 'W4C2' 'W4D1'
 'AHCOA-E' 'AHELC-E' 'AHOHF-E' 'AHOKE-E' 'AIELC-E' 'AIODS-E' 'AOELC-E'
 'APELC-E' 'ATELC-E' 'ATODS-E' 'CECELCCEN-E' 'CECELCCEN-N' 'CECELCCHIL-E'
 'CECELCHP-E' 'CECELCROOM-E' 'CEGELCHAL-E' 'CEGELCHPM-E' 'CEGELCHPS-E'
 'CEGELCINC-E' 'CEGELCLED-N' 'CEGELCMHL-E' 'CEHCOA-E' 'CEHELC-E'
 'CEHELC-N' 'CEHGAS-E' 'CEHGAS-N' 'CEKCOA-E' 'CEKELC-E' 'CEKOLP-E'
 'CELELCCFL-E' 'CELELCFLU-E' 'CELELCHAL-E' 'CELELCHID-E' 'CELELCINC-E'
 'CELELCLED-N' 'CEOELC-E' 'CEOODS-E' 'CEOOHF-E' 'CERELC-E' 'CERELC-N'
 'CETELC-E' 'CEWCOA-E' 'CEWELC-E' 'CEWELCHP-N' 'CEWG

In [397]:
report_df = merged_df

# Instantiate df list
filtered_dfs = []

# Iterate through each row in filter_df and filter report_df
for _, row in filter_df.iterrows():
    filtered_dfs.append(filter_data(row))

# Concatenate all dataframes in filtered_dfs to get a single dataframe
final_df = pd.concat(filtered_dfs, ignore_index=True)

# Export 
# final_df.to_csv('input/results_filter.csv', index = False)

final_df.reset_index(drop=True, inplace=True)

Filter ['Year is in [2020, 2030, 2040, 2050]', "Scenario is in ['netzero_9_imagine']", "Indicator is in ['Capacity']", "Sector is in ['Power']", 'Subsector == ETrans', 'Subsubsector == ETRANS'] resulted in empty dataframe.
Filter ['Year is in [2020, 2030, 2040, 2050]', "Scenario is in ['netzero_9_imagine']", "Indicator is in ['FlowOut']", "Sector is in ['Power']", 'Subsector == ECoal_CCS', "Short Description is in ['Electricity']"] resulted in empty dataframe.
Filter ['Year is in [2020, 2030, 2040, 2050]', "Scenario is in ['netzero_9_imagine']", "Indicator is in ['FlowOut']", "Sector is in ['Power']", 'Subsector == EHydrogen', "Short Description is in ['Electricity']"] resulted in empty dataframe.
Filter ['Year is in [2020, 2030, 2040, 2050]', "Scenario is in ['netzero_9_imagine']", "Indicator is in ['CO2CAPT']", "Sector is in ['Power']", 'Subsector == ECoal_CCS'] resulted in empty dataframe.
Filter ['Year is in [2020, 2030, 2040, 2050]', "Scenario is in ['netzero_9_imagine']", "Indica

In [398]:
final_df

Unnamed: 0,Process,Commodity,Year,Scenario,Indicator,Sector,Subsector,Subsubsector,TechDescription,IPCC_Category_L1,IPCC_Category_L2,IPCC_Category_L3,IPCC_Category_L4,Description,Short Description,Commodity_Name,SATIMGE,UniqueID
0,ETRANSDUM,ACTGRP,2020,netzero_9_imagine,GVA,Power,ETrans,ETrans,dummy tech tracking investment costs,1 Energy,0,0,0,Y,0,0,135.410000,1_24_21
1,ETRANSDUM,ACTGRP,2030,netzero_9_imagine,GVA,Power,ETrans,ETrans,dummy tech tracking investment costs,1 Energy,0,0,0,Y,0,0,158.820450,1_24_21
2,ETRANSDUM,ACTGRP,2040,netzero_9_imagine,GVA,Power,ETrans,ETrans,dummy tech tracking investment costs,1 Energy,0,0,0,Y,0,0,228.784187,1_24_21
3,ETRANSDUM,ACTGRP,2050,netzero_9_imagine,GVA,Power,ETrans,ETrans,dummy tech tracking investment costs,1 Energy,0,0,0,Y,0,0,328.345708,1_24_21
4,TPPRCARELC-N,TRAELC,2030,netzero_9_imagine,FlowIn,Transport,PassPriv,CarElectric,Transport Passenger Car Priv.Veh. Electricity-...,1 Energy,1A Fuel Combustion Activities,1A3 Transport,1A3b Transport - Road,Transport Electricity ...,Electricity,0,2.259462,1_24_27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1434,UCTLCLEIN-E,UPSCLE,2030,netzero_9_imagine,CH4,Refineries,CTL,CTL,Refinery CTL Existing ...,1 Energy,1B Fugitive Emissions from Fuels,1B3 Other Emissions,1B3 Other Emissions - Sasol,Supply Sector Coal low grade ...,Coal,Coal,2623.006800,1_26_417
1435,UCTLCLEIN-E,UPSCLE,2020,netzero_9_imagine,FlowInMt,Refineries,CTL,CTL,Refinery CTL Existing ...,1 Energy,1B Fugitive Emissions from Fuels,1B3 Other Emissions,1B3 Other Emissions - Sasol,Supply Sector Coal low grade ...,Coal,Coal,24.939985,1_26_424
1436,UCTLCLEIN-E,UPSCLE,2030,netzero_9_imagine,FlowInMt,Refineries,CTL,CTL,Refinery CTL Existing ...,1 Energy,1B Fugitive Emissions from Fuels,1B3 Other Emissions,1B3 Other Emissions - Sasol,Supply Sector Coal low grade ...,Coal,Coal,24.939985,1_26_424
1437,UHETCLE-E,UPSCLE,2020,netzero_9_imagine,FlowInMt,Refineries,CTL-Boiler,CTL,Refinery CTL Boiler Existing ...,1 Energy,1A Fuel Combustion Activities,1A1 Energy Industries,1A1c Manufacture of solid fuels and other ener...,Supply Sector Coal low grade ...,Coal,Coal,10.474150,1_26_424


In [399]:
# Initialize an empty list
cumulativeIDs = []

# Loop through N values
for N in [1, 2, 3]:
    # Loop through M values
    for M in range(127, 144):  # This will loop from 127 to 143 inclusive
        # Construct the string using the pattern and append to the list
        cumulativeIDs.append(f"{N}_24_{M}")
# Print the list
print(cumulativeIDs)


['1_24_127', '1_24_128', '1_24_129', '1_24_130', '1_24_131', '1_24_132', '1_24_133', '1_24_134', '1_24_135', '1_24_136', '1_24_137', '1_24_138', '1_24_139', '1_24_140', '1_24_141', '1_24_142', '1_24_143', '2_24_127', '2_24_128', '2_24_129', '2_24_130', '2_24_131', '2_24_132', '2_24_133', '2_24_134', '2_24_135', '2_24_136', '2_24_137', '2_24_138', '2_24_139', '2_24_140', '2_24_141', '2_24_142', '2_24_143', '3_24_127', '3_24_128', '3_24_129', '3_24_130', '3_24_131', '3_24_132', '3_24_133', '3_24_134', '3_24_135', '3_24_136', '3_24_137', '3_24_138', '3_24_139', '3_24_140', '3_24_141', '3_24_142', '3_24_143']


In [400]:
file_names = ['input/WB1.xlsx'] #['input/WB1.xlsx', 'input/WB2.xlsx', 'input/WB3.xlsx']

# Predefined year_to_column mapping.
year_to_column = {2020: 4, 2030: 5, 2040: 6, 2050: 7, 2060: 8, 2070: 9}

# Aggregate the data by UniqueID and Year, summing SATIMGE values.
aggregated = final_df.groupby(['UniqueID', 'Year'])['SATIMGE'].sum().reset_index()

# Define the range of 'certain years'.
start_year = 2020  # adjust accordingly
end_year = 2070    # adjust accordingly

# Filter out the special rows using the cumulativeIDs list.
special_rows = aggregated[aggregated['UniqueID'].isin(cumulativeIDs)]

# For these special rows, compute cumulative sums in ten-year steps.
cumulative_sums = []

for unique_id in cumulativeIDs:
    subset = special_rows[special_rows['UniqueID'] == unique_id]
    for year in range(start_year, end_year, 10):
        end = year + 10
        total_sum = subset[(subset['Year'] >= year) & (subset['Year'] < end)]['SATIMGE'].sum()
        cumulative_sums.append((unique_id, year, total_sum))

# Convert the result into a DataFrame.
cumulative_df = pd.DataFrame(cumulative_sums, columns=['UniqueID', 'StartYear', 'CumulativeSATIMGE'])


In [401]:
cumulative_df.head(200)

Unnamed: 0,UniqueID,StartYear,CumulativeSATIMGE
0,1_24_127,2020,0.0
1,1_24_127,2030,0.0
2,1_24_127,2040,0.0
3,1_24_127,2050,0.0
4,1_24_127,2060,0.0
...,...,...,...
195,3_24_132,2020,0.0
196,3_24_132,2030,0.0
197,3_24_132,2040,0.0
198,3_24_132,2050,0.0


In [402]:
aggregated = pd.merge(aggregated, Units, on='UniqueID', how='left')

In [403]:
# Conversion factors to convert everything to 'm' (for example)
conversion_factors = {
    'Mt': 0.001, #units in model for emissions are already kt.
    'USD': 1/12.94, # to 2015 USD
    'kt': 1, #all emissions are in kt
    'GW': 1, 
    'TWh': 1/3.6, #PJ to TWh
    'EJ': 0.001, #PJ to EJ
    '*': 1,
    '': 1,
    'million inhab': 0.001,
    'gas_Nm3': 25641025,#Nm3 of NGas per PJ. convert PJ of NGas to nM3 of gas    
    'crude_Mt':1/42, #Mt of Ngas per PJ. convert PJ of crude oil to Mt of crude oil
    'biomass_Mt':1/16, #Mt of biomass per PJ. based on 16MJ/kg convert PJ of biomass to Mt of biomass
    'H2_Nm':78740157, #Nm3 per PJ. convert PJ of hydrogen to Nm3
    
}

aggregated['SATIMGE'] = aggregated.apply(convert_value, axis=1)

In [404]:
type(conversion_factors)

dict

In [405]:
years_between_2020_and_2070 = [year for year in range(2020, 2071)]
print(years_between_2020_and_2070)

[2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070]


In [406]:
decades = list(range(2020, 2070, 10))  # Adjust as per your requirement
cumulative_sums = {}

for unique_id, group in aggregated.groupby('UniqueID'):
    if unique_id in cumulativeIDs:
        decade_sums = {}
        
        for start_year in decades:
            end_year = start_year + 10
            decade_data = group[(group['Year'] >= start_year) & (group['Year'] < end_year)]
            decade_sums[start_year] = decade_data['SATIMGE'].sum()
        
        cumulative_sums[unique_id] = decade_sums

# Print out some of the cumulative_sums for debugging
for key, value in list(cumulative_sums.items())[:5]:
    print(key, value)


In [407]:
list_of_years = list(range(2020, 2071))
print(list_of_years)


[2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070]


In [408]:
aggregated = aggregated[~aggregated['UniqueID'].isin(cumulativeIDs)]
aggregated = aggregated[aggregated['Year'].isin([2020,2030,2040,2050,2060,2070])]

In [409]:
cumulative_sums

{}

In [410]:
decade_to_column = {
    2020: 4,
    2030: 5,
    2040: 6,
    2050: 7,
    2060: 8,
    2070: 9,
    # Adjust as needed
}

BASE_COLUMN = 4  # Adjust as needed

grouped = aggregated.groupby(['UniqueID'])

for (unique_id_string,), group in grouped:
    workbook_id, sheet_id, row_id = map(int, unique_id_string.split('_'))
    print(f"processing workbook {workbook_id}, sheet {sheet_id}, row {row_id}")

    # Use the workbook_id to select the correct workbook filename
    if 0 < workbook_id <= len(file_names):
        file_name = file_names[workbook_id - 1]  # Adjust for 0-based indexing
    else:
        raise ValueError(f"Workbook ID {workbook_id} is out of range.")
    
    try:
        book = load_workbook(file_name)
    except FileNotFoundError:
        print(f"File {file_name} not found. Skipping to the next file.")
        continue

    sheet_names = book.sheetnames
    if 0 <= sheet_id - 1 < len(sheet_names):
        sheet_name = sheet_names[sheet_id - 1]
        sheet = book[sheet_name]
    else:
        raise ValueError(f"Sheet ID {sheet_id} is out of range for workbook {file_name}.")

    # Check if the unique_id_string is in the special rows
    # Check if the unique_id_string is in the special rows and exists in cumulative_sums
    if unique_id_string in cumulativeIDs and unique_id_string in cumulative_sums:
            print(f'Entering cumulative logic for {unique_id_string}')
            for decade_start, sum_val in cumulative_sums[unique_id_string].items():
                column_index = decade_to_column[decade_start]
                sheet.cell(row=row_id, column=column_index, value=sum_val)
    else:
        # print(f'Entering non-cumulative logic for {unique_id_string}')
        for _, row in group.iterrows():
            if row['Year'] in year_to_column:
                column_index = year_to_column[row['Year']]
                sheet.cell(row=row_id, column=column_index, value=row['SATIMGE'])
            else:
                print(f"Warning: Year {row['Year']} not in year_to_column for unique_id_string {unique_id_string}")

    book.save(file_name)


print("Done")


processing workbook 1, sheet 24, row 100


processing workbook 1, sheet 24, row 101
processing workbook 1, sheet 24, row 102
processing workbook 1, sheet 24, row 103
processing workbook 1, sheet 24, row 106
processing workbook 1, sheet 24, row 146
processing workbook 1, sheet 24, row 147
processing workbook 1, sheet 24, row 149
processing workbook 1, sheet 24, row 151
processing workbook 1, sheet 24, row 152
processing workbook 1, sheet 24, row 157
processing workbook 1, sheet 24, row 21
processing workbook 1, sheet 24, row 27
processing workbook 1, sheet 24, row 28
processing workbook 1, sheet 24, row 29
processing workbook 1, sheet 24, row 30
processing workbook 1, sheet 24, row 31
processing workbook 1, sheet 24, row 32
processing workbook 1, sheet 24, row 33
processing workbook 1, sheet 24, row 35
processing workbook 1, sheet 24, row 41
processing workbook 1, sheet 24, row 43
processing workbook 1, sheet 24, row 44
processing workbook 1, sheet 24, row 45
processing workbook 1, sheet 24, row 47
processing workbook 1, sheet 2

: 

In [388]:
start_index = 200
end_index = 400

In [387]:
x = merged_df

#x[(x['Year'] == 2020)&(x['Subsector'] == 'BioRef')&(x['Indicator']=='FlowOut')]['Short Description'].unique()

x[(x['Commodity']=='IFACHA')]
#x['Commodity'].unique()

Unnamed: 0,Process,Commodity,Year,Scenario,Indicator,Sector,Subsector,Subsubsector,TechDescription,IPCC_Category_L1,IPCC_Category_L2,IPCC_Category_L3,IPCC_Category_L4,Description,Short Description,Commodity_Name,SATIMGE
1857,SATIMMarginals,IFACHA,2026,netzero_9_imagine,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,135.629743
1858,SATIMMarginals,IFACHA,2027,netzero_9_imagine,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,161.971195
1859,SATIMMarginals,IFACHA,2028,netzero_9_imagine,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,162.238246
1860,SATIMMarginals,IFACHA,2029,netzero_9_imagine,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,168.497666
1861,SATIMMarginals,IFACHA,2030,netzero_9_imagine,Marginals,All,All,All,SATIMMarginals Annual,,,,,Industry - Biochar,BioChar,BioChar,168.544272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181092,XINDBIO,IFACHA,2054,netzero_9_imagine,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,94.615717
181093,XINDBIO,IFACHA,2055,netzero_9_imagine,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,95.183996
181094,XINDBIO,IFACHA,2058,netzero_9_imagine,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,96.888833
181095,XINDBIO,IFACHA,2063,netzero_9_imagine,FlowOut,,,Other,Industry Biochar ...,,,,,Industry - Biochar,BioChar,BioChar,99.703502
