## Calculate GHG emissions at each petrochemical facility.

In [None]:
# Import packages
import warnings
from process_iteration_functions import *

pd.options.mode.chained_assignment = None

warnings.filterwarnings("ignore", message="DataFrame is highly fragmented.*")
warnings.filterwarnings("ignore", message="RuntimeWarning: Mean of empty slice.*")
warnings.filterwarnings("ignore", message="FutureWarning: The behavior of DataFrame.*")
warnings.filterwarnings("ignore", message="FutureWarning: The provided callable*")

In [None]:
# Data file paths - ## REPLACE '_demo' files with calculated files when conducting real run
input_path = '../data/'

output_path = input_path+'test_output/'
os.mkdir(output_path) if not os.path.exists(output_path) else None

ihs_materials_path = input_path+'processed/ihsMaterials_w_uncertainties_demo.csv'
direct_energyUse_conversion_path = input_path+'extra_inputs/direct_utility_conversion_factors.csv'
direct_process_path = input_path+'extra_inputs/Direct process emissions.xlsx'

ecoinvent_file = input_path+'extracted/EI_3_10_APOS_EFs_in_IHS_demo.csv'
carbonMinds_file = input_path+'processed/conversionFactors_carbonMinds_grouped_demo.csv'
ifa_file = input_path+'extracted/conversionFactors_from_IFA_w_uncertainties_demo.csv'

match_list_path = input_path+'processed/ihs_to_ei_matches_formatted_demo.csv'

product_group_path = input_path+'extra_inputs/product_groups.csv'

production_file = input_path+'processed/icisFacilityProduction_dedoubled_demo.csv'
icis_ihs_match_file = input_path+'extra_inputs/all_icis_to_ihs_matches_24.csv'

ammonia_processes_file = input_path+'extra_inputs/ammonia_processes_used.csv'
fert_production_file = input_path+'extracted/IFA_country_production_demo.csv'

ethylene_feedstocks_file = input_path+'extracted/icisEthyleneFeedstocks_1978-2050_demo.csv'
ethylene_feedstock_types_file = input_path+'extra_inputs/feedstock_type.csv'

In [None]:
# Import IHS process recipes
ihs_materials = pd.read_csv(ihs_materials_path, index_col=0)

# Direct energy use conversion 
direct_utl_conv = pd.read_csv(direct_energyUse_conversion_path, index_col=0)

# Direct process conversion
direct_process_efs = pd.read_excel(direct_process_path, skiprows=2)[1:].dropna(subset=['Process']).sort_values('Process').reset_index(drop=True)
product_process_match = pd.read_csv(input_path+'extra_inputs/product_to_directProcess_matches.csv')

# Import conversion factors
ei_emissions = pd.read_csv(ecoinvent_file, index_col=0)
cm_emissions = pd.read_csv(carbonMinds_file, index_col=0)
ifa_factors = pd.read_csv(ifa_file, index_col=0)

# Import match lists
match_list_ei = pd.read_csv(match_list_path, index_col=False, usecols=['IHS','ei'])
match_list_cm = pd.read_csv(match_list_path, index_col=False, usecols=['IHS','cm'])

# Import chemical type list
product_group = pd.read_csv(product_group_path, index_col=0).rename(columns={'PRODUCT':'Product'})

ihs_prod_names = pd.read_csv(input_path+'extra_inputs/ihs_product_names.csv')
name_convs = dict(zip(ihs_prod_names['IHS_NAME'], ihs_prod_names['ICIS_NAME']))
ihs_materials['Product'] = ihs_materials['Product'].replace(name_convs)

ihs_materials = ihs_materials.merge(product_group, on='Product', how='left')

# Calculate direct process conversions
direct_process_convs = calc_direct_process_convs(direct_process_efs, product_process_match, ei_emissions)
direct_process_convs['Product'] = direct_process_convs['Product'].replace(name_convs)

input_types = pd.concat((product_group, 
                         pd.read_csv(input_path+'extra_inputs/raw_material_types.csv', index_col=0).reset_index(drop=True).rename(columns={'PRODUCT':'Product'}))).rename(columns={'Product group':'Input_group', 'Product type':'Input_type', 'Product':'Source/Object'})

# Replace Type for raw materials based on matching product type in input types 
ihs_materials = ihs_materials.merge(input_types, on='Source/Object', how='left')
ihs_materials['Type'] = ihs_materials.apply(lambda x: x['Input_group'] if x['Type'] == 'Raw Material' and pd.notna(x['Input_group']) else x['Type'], axis=1)
ihs_materials.drop(columns=['Input_group', 'Input_type'], inplace=True)

ammonia_processes = pd.read_csv(ammonia_processes_file, index_col=0)
ifa_production = import_ifa(fert_production_file)

icis_ihs_matches = pd.read_csv(icis_ihs_match_file, index_col=0)
facility_production = pd.read_csv(production_file, index_col=0)

ethylene_feedstocks = pd.read_csv(ethylene_feedstocks_file, index_col=0, header=[0,1])
ethylene_feedstock_vals = get_ethylene_feedstock_vals(facility_production, ethylene_feedstocks)

ethylene_feedstock_types = pd.read_csv(ethylene_feedstock_types_file)

# Add on bio processes
bio_forecast, weighting = False, False

if bio_forecast:
    extra_processes = pd.read_csv(input_path+'extra_inputs/benzene_toluene_process.csv')
    
    bt_matches = icis_ihs_matches[icis_ihs_matches['PRODUCT'].isin(['BENZENE', 'TOLUENE'])]
    bt_matches['ihs_match'] = extra_processes['Target/Process'].unique()[0]
    bt_matches = bt_matches.drop_duplicates()
    icis_ihs_matches = pd.concat((icis_ihs_matches[~icis_ihs_matches['PRODUCT'].isin(['BENZENE', 'TOLUENE'])], bt_matches))    

    bio_conv_factors = pd.read_csv(input_path+'extra_inputs/bio_EFs.csv', index_col=0)
    bio_processes = pd.read_csv(input_path+'extra_inputs/bio_processes.csv')

    # Replace primary chemical processes with bio routes
    ihs_materials = pd.concat((ihs_materials[ihs_materials['Target/Process'].isin(bio_processes['Target/Process'])],
           ihs_materials[~ihs_materials['Product'].isin(bio_processes['Product'].unique())]))

    # Replace feedstock emissions factors with bio factors
    for source in bio_conv_factors['Source']:
        ind = ei_emissions[ei_emissions['Source']==source].index
        ei_emissions.loc[ind] = bio_conv_factors[bio_conv_factors['Source']==source].values

In [None]:
coal_use = {'China': 0.829268293,
            'USA': 0.034883721}

ammonia_weight = ifa_production[ifa_production['PRODUCT']=='AMMONIA']
ammonia_weight['ROUTE'] = 'SR'
ammonia_weight['coal_conv'] = ammonia_weight['COUNTRY/TERRITORY'].map(coal_use).fillna(0)
ammonia_weight['gas_conv'] = 1-ammonia_weight['coal_conv']

# Multiply all values by coal conversion factor
ammonia_coal = ammonia_weight[ammonia_weight['coal_conv']>0]
ammonia_gas = ammonia_weight.copy()
for year in range(1978, 2051):
    ammonia_coal[str(year)] = ammonia_coal[str(year)].multiply(ammonia_coal['coal_conv'], axis=0)
    ammonia_coal[str(year)+'_sigma'] = ammonia_coal[str(year)+'_sigma'].multiply(ammonia_coal['coal_conv'], axis=0)
    ammonia_gas[str(year)] = ammonia_gas[str(year)].multiply(ammonia_gas['gas_conv'], axis=0)
    ammonia_gas[str(year)+'_sigma'] = ammonia_gas[str(year)+'_sigma'].multiply(ammonia_gas['gas_conv'], axis=0)
ammonia_coal['ROUTE'] = 'Coal'

ammonia_weight = pd.concat((ammonia_coal, ammonia_gas)).drop(columns=['coal_conv', 'gas_conv'])
ifa_production = pd.concat((ifa_production[ifa_production['PRODUCT']!='AMMONIA'], ammonia_weight))

In [None]:
# Calculation loop
groups = ['Primary chemicals', 'Primary chemicals', 'Primary chemicals', 'Primary chemicals', 'Intermediates', 'Intermediates', 'Intermediates', 'Downstream', 'Downstream']
group_output_names = ['PC1', 'PC2', 'PC3', 'PC4', 'IC1', 'IC2', 'IC3', 'DS1', 'DS2']

for current_group, current_group_name in zip(groups, group_output_names):
    if current_group_name in ['PC4', 'IC3', 'DS2']:
        output_iteration = True
    else: output_iteration = False
    
    print(current_group_name)
    # Fetch chemicals at this stage of production
    current_chemical_names = product_group[product_group['Product type']==current_group]['Product']
    current_chemicals = ihs_materials[ihs_materials['Product type']==current_group]

    if len(current_chemicals) == 0:
        print('No '+current_group+' included in current data.')
        continue
    
    # Calculate material emissions for each process
    material_emissions = get_upstream_emissions(current_chemicals, ei_emissions, cm_emissions, match_list_ei, match_list_cm)
    material_emissions['Type'] = material_emissions['Type'].replace({'Utilities':'Indirect Utilities'})
    material_emissions = get_direct_energy_emissions(material_emissions, direct_utl_conv, ei_emissions)
    material_emissions = get_direct_process_emissions(material_emissions, direct_process_convs)
    
    if bio_forecast:
        # Load in bio processes and split benzene and toluene routes
        material_emissions = pd.concat((material_emissions[material_emissions['Product']!='BENZENE AND TOLUENE'], extra_processes))
    print('Materials done.')
    
    # Allocation emissions from all materials to each co-product from processes
    emission_val_cols = list(ei_emissions.columns[3:16])
    mass_allocation = allocate_emissions(material_emissions.copy(), emission_val_cols)
    ## Add here if doing for energy and economic allocation
    combined_factors = calculate_implied_emissions_factors(mass_allocation, material_emissions, emission_val_cols, suffixes=[''])
    combined_factors = add_ifa_conv_factors(combined_factors, ifa_factors)

    # Only keep conversion factors needed for this round
    cf_subset = combined_factors[combined_factors['Product'].isin(product_group[product_group['Product type']==current_group]['Product'])].dropna(subset=['ihs_match'])
    cf_subset = cf_subset.fillna(0)
        
    cf_subset.to_csv(output_path+'processConversionFactors_'+current_group_name+'_.csv')
    print('EFs done')
    
    ## Assign processes to facilities
    # Filter out outlying possible processes
    poss_processes = icis_ihs_matches[icis_ihs_matches['PRODUCT'].isin(product_group[product_group['Product type']==current_group]['Product'])].merge(
        cf_subset, left_on=['ihs_match'], right_on=['ihs_match'], how='left')
    
    current_facilities = facility_production[facility_production['PRODUCT'].isin(product_group[product_group['Product type']==current_group]['Product'])]
    
    facility_conversion = merge_matching_processes(current_facilities, poss_processes)

    current_ifa = ifa_production[ifa_production['PRODUCT'].isin(product_group[product_group['Product type']==current_group]['Product'])]
    
    facility_conversion = add_ifa_production(facility_conversion, current_ifa, cf_subset, ammonia_processes)
    
    ## Calculate facility emissions
    ### -> Misses products with no IHS match
    facility_conversion_orig = facility_conversion.dropna(subset=['ihs_match']).merge(cf_subset, on=['ihs_match'], how='left')
    
    if output_iteration:
        dbs, names = None, None
    else: 
        dbs, names = ['ihs_cradle-to-out-gate '], ['IHS CtOG']
    
    facility_emissions = calculate_facility_emissions(facility_conversion_orig, dbs=dbs, names=names, emission_val_cols=emission_val_cols, output_path=output_path)
    
    aggregated_emissions = aggregate_facility_emissions(facility_emissions)
    
    if current_group == 'Primary chemicals' and bio_forecast is False and weighting is True:
        ethylene_conv_factors = calculate_ethylene_feedstock_emissions(cf_subset, ethylene_feedstock_types)
        ethylene_facility_factors = attribute_weighted_ethylene_to_facilities(ethylene_feedstock_vals, ethylene_conv_factors)
        
        emissions_weighted = merge_weighted_ethylene(aggregated_emissions, ethylene_facility_factors, dbs=dbs, names=names)
    
    else: emissions_weighted = aggregated_emissions.copy()
    
    emissions_weighted.to_parquet(output_path+'facilityEmissions_'+current_group_name+'_.parquet')
    
    ## Calculate EFs for next iteration
    iteration_ef_updates = get_updated_efs(facility_production, current_ifa, emissions_weighted, current_chemical_names, cf_subset)
    iteration_ef_updates.to_csv(output_path+'average_efs_'+current_group_name+'_.csv')
    
    if output_iteration == False:
        ei_emissions, match_list_ei, cm_emissions, match_list_cm = update_ef_dfs(ei_emissions, match_list_ei, cm_emissions, match_list_cm, iteration_ef_updates, current_chemical_names)


print('Processing finished')

In [None]:
## Check facilities emissions file if desired
test_file = pd.read_parquet('INSERT FINAL OUTPUT PARQUET FILEPATH')

In [None]:
# Print test
test_file.head(10)