# Breakdown of Calculation of per facility ICIS emisions given related IHS processes

Assumptions -> Conversion factor used for ICIS facility is mean of conversion factors for all corresponding IHS processes
-> For Ethylene, feedstock conversion factor is mean of conversion factors linked to feedstock

In [3]:
# Import packages
import re
import numpy as np
import pandas as pd
from tqdm import tqdm

# File paths
input_path = '../data/'
output_path = '../data/combined/'

production_file = input_path+'processed/icisFacilityProduction_w_uncertainties.csv'
conversion_factor_file = input_path+'combined/ihsProcessEmissions_decomposition_mass.csv'
matching_file = input_path+'extra_inputs/all_icis_to_ihs_matches.csv'
matching_on = ['PRODUCT', 'ROUTE', 'TECHNOLOGY', 'LICENSOR']

## Import data

In [2]:
# Import data
icis_ihs_matches = pd.read_csv(matching_file, index_col=0)
facility_production = pd.read_csv(production_file, index_col=0)

conv_factors = pd.read_csv(conversion_factor_file, index_col=0)
# conv_factors = conv_factors.dropna(subset=['ihs_match'])
# keep_cols = ['ihs_match', 'Product', 'Material', 'Material Type', 'Gas', 'value']
# conv_factors = pd.read_csv(conversion_factor_file, index_col=0).rename(columns={'Target/Process':'ihs_match'})[keep_cols]
# conv_factors = conv_factors[conv_factors.columns[['conv_factor' not in col for col in conv_factors.columns]]].dropna(subset=['ihs_match'])

In [45]:
## Add IFA production
fert_production_file = input_path+'extracted/IFA_production_w_uncertainties.csv'
ifa_production = pd.read_csv(fert_production_file)

name_conversions = {
    'NH3': 'AMMONIA',
    'AN': 'AMMONIUM NITRATE',
    'Ammonium nitrate (33.5-0-0) granulated': 'AMMONIUM NITRATE',
    'AS': 'AMMONIUM SULPHATE',
    'CAN': 'CALCIUM AMMONIUM NITRATE',
    'Calcium ammonium nitrate (27-0-0)': 'CALCIUM AMMONIUM NITRATE',
    'Urea (46-0-0)': 'UREA'
}

ifa_ihs_matches = {
    'AMMONIA':'AMMONIA',
    'AMMONIUM NITRATE': 'AMMONIUM NITRATE FERTILIZER',
    'AMMONIUM SULPHATE': 'HYDROXYLAMMONIUM SULFATE',
    'CALCIUM AMMONIUM NITRATE':'AMMONIUM NITRATE FERTILIZER',
    'UREA': 'UREA, AGRICULTURAL GRADE'
}

ifa_production['PRODUCT'] = ifa_production['PRODUCT'].replace(name_conversions)
ifa_production.rename(columns={'Region':'COUNTRY/TERRITORY'}, inplace=True)
ifa_production['Conv_name'] = ifa_production['PRODUCT'].replace(ifa_ihs_matches)

In [51]:
facility_conversion = facility_production.merge(icis_ihs_matches, on=matching_on, how='left')
ifa_conversion = ifa_production.merge(conv_factors[['Target/Process', 'Product']].rename(columns={'Target/Process':'ihs_match'}).drop_duplicates(), left_on='Conv_name', right_on='Product', how='left').drop(columns=['Conv_name', 'Product'])
facility_conversion = pd.concat((facility_conversion, ifa_conversion))

In [6]:
# Print out Products with no IHS match
#facility_conversion[facility_conversion['ihs_match'].isna()][['PRODUCT','ROUTE']].drop_duplicates()

In [56]:
### -> Misses products with no IHS match
facility_conversion = facility_conversion.dropna(subset=['ihs_match']).merge(conv_factors, left_on=['ihs_match'], right_on='Target/Process', how='left')

facility_conversion = facility_conversion.drop_duplicates().drop(columns=['Emission source'])

In [58]:
### -> Misses products with no IHS match
facility_conversion_pivoted = facility_conversion.pivot(index=list(facility_conversion.columns[:-4])+['Material', 'Material Type'], columns='Gas', values='value')
facility_conversion_pivoted.reset_index().to_csv(output_path+'facility_conversion_pivoted.csv')

  facility_conversion_pivoted.reset_index().to_csv(output_path+'facility_conversion_pivoted.csv')


In [59]:
facility_conversion = pd.read_csv(output_path+'facility_conversion_pivoted.csv', index_col=0)

  exec(code_obj, self.user_global_ns, self.user_ns)


## Calculate emissions by combining production with conversion factors

In [60]:
# Take average of CM and EI conversion factors

# emission_val_cols = ['CO2e_20a', 'CO2e_100a']#, 'Carbon monoxide', 'Chloroform', 'Dinitrogen monoxide', 'Ethane', 'Methane', 'Nitric oxide', 'Nitrogen fluoride', 'Perfluoropentane', 'Sulfur hexafluoride', 'Other']
# emission_val_cols_sigma = [col + '_sigma' for col in emission_val_cols]
#
# for column, col_sigma in zip(emission_val_cols, emission_val_cols_sigma):
#     facility_conversion['combined_' + column] = np.nanmean([facility_conversion['ei_' + column + '_cradle-to-gate'], facility_conversion['cm_' + column + '_cradle-to-gate']], axis=0)
#     facility_conversion['combined_' + col_sigma] = np.nanmean([facility_conversion['ei_' + column + '_cradle-to-gate_sigma'], facility_conversion['cm_' + column + '_cradle-to-gate_sigma']], axis=0)
#
# facility_conversion = facility_conversion[facility_conversion.columns[['ei' not in col and 'cm' not in col for col in facility_conversion.columns]]]
#
# facility_conversion.columns = [i.replace(', mass allocation ','_').replace('_factor','') for i in facility_conversion.columns]
#
# facility_conversion.rename(columns={'ihs_match':'PROCESS'}, inplace=True)
#
# facility_conversion.columns = [i.replace(',  allocation ','_').replace('_factor','') for i in facility_conversion.columns]

In [61]:
# Calculate facility emissions for
# dbs = ['combined_', 'ihs_cradle-to-out-gate ', 'Raw Material ', 'Indirect Utilities ', 'Direct Utilities ', 'Direct Process ', 'Electricity ']
# names = ['EI & CM', 'IHS CtOG', 'Raw Material', 'Indirect Utilities', 'Direct Utilities', 'Direct Process', 'Electricity']

emission_val_cols = ['CO2e_20a', 'CO2e_100a']
emission_val_cols_sigma = [col + '_sigma' for col in emission_val_cols]

# Create base dataframe to use
years = [str(i) for i in range(1978, 2051)]
years_sigma = [year+'_sigma' for year in years]
base_columns = ['PRODUCT', 'COUNTRY/TERRITORY', 'STATE', 'COMPANY', 'SITE', '#',
       'ROUTE', 'TECHNOLOGY', 'LICENSOR', 'START_YR', 'COMPLEX', 'LATITUDE', 'LONGITUDE', 'ihs_match', 'Material', 'Material Type'] + years + years_sigma
base_df = facility_conversion[base_columns]

facility_emissions = pd.DataFrame()
# for db, name in tqdm(zip(dbs, names)):
for gas in tqdm(emission_val_cols):
    df = base_df.copy()
    df[years] = df[years].multiply(facility_conversion[gas], axis='index')
    ## Incorrect error propagation here
    df[years_sigma] = df[years_sigma].multiply(facility_conversion[gas+'_sigma'], axis='index')
    df['Gas'] = gas
    facility_emissions = pd.concat((facility_emissions, df), axis = 0)

# # Save as parquet file for large size
# facility_emissions.to_parquet(output_path+'icisFacilityEmissions_allIhsProcesses_w_uncertainties_10-12.parquet')

100%|██████████| 2/2 [00:13<00:00,  6.88s/it]


In [62]:
# Save as parquet file for large size
facility_emissions['COMPLEX'] = facility_emissions['COMPLEX'].astype(str)
facility_emissions.to_parquet(output_path+'icisFacilityEmissionsDetailed_allIhsProcesses_w_uncertainties.parquet')

In [2]:
# endings = ['1-3', '4-6', '7-9', '10-12']
#
# mean_aggregated = pd.DataFrame()
#
# for end in endings:
#     facility_emissions = pd.read_parquet(output_path+'icisFacilityEmissions_allIhsProcesses_w_uncertainties_'+end+'.parquet')
#     aggregated_emissions = facility_emissions.groupby(list(facility_emissions.columns[:13])+['Gas','Type']).mean().reset_index()
#     mean_aggregated = pd.concat((mean_aggregated, aggregated_emissions))
#
# mean_aggregated.to_parquet(output_path+'icisFacilityEmissions_ihsMean_w_uncertainties_allgases.parquet')


In [2]:
facility_emissions = pd.read_parquet(output_path+'icisFacilityEmissionsDetailed_allIhsProcesses_w_uncertainties.parquet')

In [68]:
years = [str(i) for i in range(1978, 2051)]
years_sigma = [year+'_sigma' for year in years]
facility_emissions[years+years_sigma] = facility_emissions[years+years_sigma].fillna(0)

In [69]:
# Take average output when many IHS processes correspond to single facility to get one value for each facility
aggregated_emissions = facility_emissions.fillna('n.a.').groupby(list(facility_emissions.columns[:13])+['Material','Material Type','Gas']).mean()

In [72]:
aggregated_emissions

Unnamed: 0,PRODUCT,COUNTRY/TERRITORY,STATE,COMPANY,SITE,#,ROUTE,TECHNOLOGY,LICENSOR,START_YR,...,2041_sigma,2042_sigma,2043_sigma,2044_sigma,2045_sigma,2046_sigma,2047_sigma,2048_sigma,2049_sigma,2050_sigma
0,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450197,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,0.805235,0.863948,0.922320,0.980368,1.038109,1.095557,1.152728,1.209633,1.266286,1.322699
450198,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,3.425298,3.675052,3.923355,4.170280,4.415897,4.660271,4.903461,5.145525,5.386516,5.626483
450199,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,4.462032,4.787379,5.110835,5.432497,5.752455,6.070793,6.387590,6.702919,7.016850,7.329448
450200,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,3.217757,3.452379,3.685637,3.917600,4.148335,4.377902,4.606358,4.833755,5.060144,5.285571


In [75]:
facility_emissions

Unnamed: 0,PRODUCT,COUNTRY/TERRITORY,STATE,COMPANY,SITE,#,ROUTE,TECHNOLOGY,LICENSOR,START_YR,...,2042_sigma,2043_sigma,2044_sigma,2045_sigma,2046_sigma,2047_sigma,2048_sigma,2049_sigma,2050_sigma,Gas
0,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,CO2e_20a
1,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,CO2e_20a
2,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,CO2e_20a
3,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,CO2e_20a
4,"1,4-BUTANEDIOL",BELGIUM,n.a.,BASF,FELUY,1.0,MALEIC ANHYDRIDE,HYDRATION,KVAERNER PROCESS TECH,1997.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,CO2e_20a
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816492,VINYL ACETATE M.,UNITED STATES,n.a.,INEOS ACETYLS,~UNKNOWN USA,1.0,ETHYLENE,n.a.,n.a.,0.0,...,1.479591,1.474255,1.469100,1.464118,1.459301,1.454639,1.450126,1.445755,1.441519,CO2e_100a
816493,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,7.851979,8.382493,8.910065,9.434841,9.956960,10.476552,10.993737,11.508628,12.021333,CO2e_100a
816494,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,0.760172,0.811532,0.862608,0.913413,0.963961,1.014264,1.064334,1.114182,1.163818,CO2e_100a
816495,VINYL ACETATE M.,UNITED STATES,n.a.,~ADDITIONAL,~UNKNOWN USA,1.0,ETHYLENE,ACETOXYLATION,n.a.,2029.0,...,3.675052,3.923355,4.170280,4.415897,4.660271,4.903461,5.145525,5.386516,5.626483,CO2e_100a


In [70]:
aggregated_emissions = aggregated_emissions.reset_index()
aggregated_emissions[aggregated_emissions.columns[:17]] = aggregated_emissions[aggregated_emissions.columns[:17]].astype(str)

In [71]:
# Save aggregated emissions
aggregated_emissions.to_parquet(output_path+'icisFacilityEmissionsDetailed_ihsMean_w_uncertainties.parquet')

## Weighted average for Ethylene production

In [28]:
# Read in individual facilities
facility_production = pd.read_csv(production_file, index_col=0)
eth_prod = facility_production[facility_production['PRODUCT']=='ETHYLENE'].reset_index(drop=True)
conv_factors = pd.read_csv(conversion_factor_file, index_col=0)
eth_conv = conv_factors[conv_factors['Product']=='ETHYLENE'].reset_index(drop=True).rename(columns={'Target/Process':'ihs_match'})

# Ethylene feedstocks
feedstocks = pd.read_csv(input_path+'extracted/icisEthyleneFeedstocks_1978-2050.csv', index_col=0, header=[0,1])
feedstock_types = pd.read_csv(input_path+'extra_inputs/feedstock_type.csv')

In [29]:
# Get emissions for each feedstock
years = list(map(str, list(range(1978, 2051))))

feedstock_vals = feedstocks.copy()
for year in years:
    df = feedstock_vals[year]
    df['CAPACITY'] = df['CAPACITY'].apply(lambda x: re.sub("[^0-9.]", "0", str(x))).astype(float)
    df[df.columns[1:]] = df[df.columns[1:]].multiply(df['CAPACITY']/100, axis='index')
    feedstock_vals[year] = df

feedstock_emissions = eth_conv.merge(feedstock_types, on='ihs_match', how='left').groupby('Feedstock').mean()

feedstock_emissions.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['CAPACITY'] = df['CAPACITY'].apply(lambda x: re.sub("[^0-9.]", "0", str(x))).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0_level_0,Research Year,Value,Value_sigma,"Mass, kg","Mass, kg_sigma",Mass ratio,Mass ratio_sigma,value
Feedstock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ETHANE,2005.814815,1.0,0.0,1.0,0.0,0.671107,0.009557,0.21167
HEAVYCON,2017.0,1.0,0.0,1.0,0.0,0.263622,0.009706,0.158931
LPG,1994.363636,1.0,0.0,1.0,0.0,0.341998,0.010197,0.45178
MID-DIST,2011.111111,1.0,0.0,1.0,0.0,0.431727,0.012227,0.647867
NAPHTHA,2011.0,1.0,0.0,1.0,0.0,0.27905,0.00977,0.18324


In [30]:
# Apply emissions to each facility
blank = feedstock_vals[feedstock_vals.columns[:7]]
blank.columns = list(blank.columns.droplevel(1))
conversions = feedstock_emissions.columns[['allocation' in name for name in feedstock_emissions.columns]]

for conversion in conversions:
    fs_ems = feedstock_emissions[conversion]
    each_conv = pd.DataFrame()
    for year in years:
        df = feedstock_vals[year]
        for fs in df.columns[1:]:
            df[fs] = df[fs]*fs_ems.loc[fs]
        yearly = blank.copy()
        yearly['Year'] = year
        yearly[conversion] = np.sum(df[df.columns[1:]].values, axis=1)
        each_conv = pd.concat((each_conv,yearly), axis=0)
    if conversion != conversions[0]:
        ethylene_ems = ethylene_ems.merge(each_conv, on=list(each_conv.columns[:8]), how='left')
    else: ethylene_ems = each_conv

ethylene_ems.head()

NameError: name 'ethylene_ems' is not defined

In [7]:
# Convert output to facility_emissions format
ethylene_conv = ethylene_ems.copy()
ethylene_conv.columns = [i.replace(',  allocation ','_').replace('_factor','') for i in ethylene_conv.columns]

dbs = ['ihs_cradle-to-out-gate ', 'Raw Material ', 'Indirect Utilities ', 'Direct Utilities ', 'Direct Process ', 'Electricity ']
names = ['IHS CtOG', 'Raw Material', 'Indirect Utilities', 'Direct Utilities', 'Direct Process', 'Electricity']
emission_val_cols = ['CO2e_20a', 'CO2e_100a', 'Carbon dioxide', 'Carbon monoxide', 'Chloroform', 'Dinitrogen monoxide', 'Ethane', 'Methane', 'Nitric oxide', 'Nitrogen fluoride', 'Perfluoropentane', 'Sulfur hexafluoride', 'Other']

base_cols = list(ethylene_conv.columns[:7])

ethylene_pivoted = pd.DataFrame()
ethylene_piv_sigma = pd.DataFrame()

for db, name in zip(dbs, names):
    for gas in emission_val_cols:
        df = ethylene_conv[base_cols+['Year', db+gas]]
        df = df.pivot(index = base_cols, columns = 'Year', values = db+gas)
        df.index.name = None
        df['Gas'] = gas
        df['Type'] = name
        ethylene_pivoted = pd.concat((ethylene_pivoted, df), axis = 0)

        df_sigma = ethylene_conv[base_cols+['Year', db+gas+'_sigma']]
        df_sigma = df_sigma.pivot(index = base_cols, columns = 'Year', values = db+gas+'_sigma')
        df_sigma.index.name = None
        df_sigma['Gas'] = gas
        df_sigma['Type'] = name
        ethylene_piv_sigma = pd.concat((ethylene_piv_sigma, df_sigma), axis = 0)

ethylene_weighted = ethylene_pivoted.merge(ethylene_piv_sigma, on=base_cols+['Gas', 'Type'], how='left', suffixes=('','_sigma')).reset_index()

ethylene_weighted.columns.name = None
ethylene_weighted = ethylene_weighted.fillna(0)

In [9]:
# Update ethylene values in facility emissions database
facility_emissions = pd.read_parquet(output_path+'icisFacilityEmissions_ihsMean_w_uncertainties_allgases.parquet')

emissions_merged = facility_emissions.merge(ethylene_weighted, on=['COUNTRY/TERRITORY', 'STATE', 'COMPANY', 'SITE', '#', 'Gas', 'Type'], how='left', suffixes=('_old','')).drop(columns='START_YR').rename(columns={'START_YR_old':'START_YR'})

# years = [str(i) for i in range(1978, 2051)]
# years_sigma = [year+'_sigma' for year in years]

for year, uncert in zip(years, years_sigma):
    emissions_merged[year] = emissions_merged[year].fillna(emissions_merged[year+'_old'])
    emissions_merged[uncert] = emissions_merged[uncert].fillna(emissions_merged[uncert+'_old'])

facility_emissions_update = emissions_merged.drop(columns=list(emissions_merged.columns[['_old' in i for i in emissions_merged.columns]]) + ['START_MO'])

In [10]:
facility_emissions_update.to_parquet(output_path+'icisFacilityEmissions_ihsWeighted_w_uncertainties_allgases.parquet')

In [36]:
## Utility functions
def uncertainty_propagation(calc:str, x:float, dx:float, y:float=1, dy:float=0, z:float=1, propagation_type:str='simple') -> float:
    if calc == 'mult':
        xdiv = np.divide(dx, x, out=np.zeros_like(dx), where=x!=0)
        ydiv = np.divide(dy, y, out=np.zeros_like(dy), where=y!=0)
        if propagation_type == 'simple':
            return (xdiv + ydiv)*z
        elif propagation_type == 'stdev':
            return np.sqrt(pow(xdiv,2) + pow(ydiv,2))*z
        else: Exception('Specified propagation_type not recognised.')

    elif calc == 'add':
        if propagation_type == 'simple':
            return abs(dx)+abs(dy)
        elif propagation_type == 'stdev':
            return np.sqrt(pow(dx,2) + pow(dy,2))
        else: Exception('Specified propagation_type not recognised.')
    else: Exception('Please specify calc of propagation')#%%

# All possible facility emissions given different processes for making same product

In [5]:
facility_match = facility_conversion[list(facility_conversion.columns[:list(facility_conversion.columns).index('PROCESS')+2])]

In [6]:
# sort_col = 'ihs_cradle-to-out-gate CO2e_20a,  allocation factor'
# min_conv_factors = conv_factors.sort_values(['Product', sort_col]).groupby('Product').head(1).reset_index(drop=True)

In [9]:
facility_min_type = facility_match.drop(columns='PROCESS').merge(conv_factors, on='Product', how='left').drop_duplicates()

In [10]:
emission_val_cols = ['CO2e_20a', 'CO2e_100a']#, 'Carbon dioxide', 'Carbon monoxide', 'Chloroform', 'Dinitrogen monoxide', 'Ethane', 'Methane', 'Nitric oxide', 'Nitrogen fluoride', 'Perfluoropentane', 'Sulfur hexafluoride', 'Other']
emission_val_cols_sigma = [col + '_sigma' for col in emission_val_cols]

for column, col_sigma in zip(emission_val_cols, emission_val_cols_sigma):
    facility_min_type['combined_' + column] = np.nanmean([facility_min_type['ei_' + column + '_cradle-to-gate'], facility_min_type['cm_' + column + '_cradle-to-gate']], axis=0)
    facility_min_type['combined_' + col_sigma] = np.nanmean([facility_min_type['ei_' + column + '_cradle-to-gate_sigma'], facility_min_type['cm_' + column + '_cradle-to-gate_sigma']], axis=0)

facility_min_type = facility_min_type[facility_min_type.columns[['ei' not in col and 'cm' not in col for col in facility_min_type.columns]]]

facility_min_type.columns = [i.replace(',  allocation factor','').replace(',  allocation sigma','_sigma') for i in facility_min_type.columns]

facility_min_type.rename(columns={'ihs_match':'PROCESS'}, inplace=True)

  facility_min_type['combined_' + column] = np.nanmean([facility_min_type['ei_' + column + '_cradle-to-gate'], facility_min_type['cm_' + column + '_cradle-to-gate']], axis=0)
  facility_min_type['combined_' + col_sigma] = np.nanmean([facility_min_type['ei_' + column + '_cradle-to-gate_sigma'], facility_min_type['cm_' + column + '_cradle-to-gate_sigma']], axis=0)


In [11]:
# Calculate facility emissions for
dbs = ['combined_', 'Raw Material ', 'Indirect Utilities ', 'Direct Utilities ', 'Direct Process ', 'Electricity ']
names = ['EI & CM', 'Raw Material', 'Indirect Utilities', 'Direct Utilities', 'Direct Process', 'Electricity']

# Create base dataframe to use
years = [str(i) for i in range(1978, 2051)]
years_sigma = [year+'_sigma' for year in years]
base_columns = ['PRODUCT', 'COUNTRY/TERRITORY', 'STATE', 'COMPANY', 'SITE', '#',
       'ROUTE', 'TECHNOLOGY', 'LICENSOR', 'START_YR', 'COMPLEX', 'LATITUDE', 'LONGITUDE', 'PROCESS'] + years + years_sigma
base_df = facility_min_type[base_columns]

facility_mins = pd.DataFrame()
for db, name in tqdm(zip(dbs, names)):
    for gas in tqdm(emission_val_cols):
        df = base_df.copy()
        df[years] = df[years].multiply(facility_min_type[db+gas], axis='index')
        ## Incorrect error propagation here
        df[years_sigma] = df[years_sigma].multiply(facility_min_type[db+gas+'_sigma'], axis='index')
        df['Gas'] = gas
        df['Type'] = name
        facility_mins = pd.concat((facility_mins, df), axis = 0)

0it [00:00, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:01<00:01,  1.03s/it][A
100%|██████████| 2/2 [00:01<00:00,  1.04it/s][A
1it [00:01,  1.93s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:00<00:00,  1.02it/s][A
100%|██████████| 2/2 [00:02<00:00,  1.05s/it][A
2it [00:04,  2.03s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:01<00:01,  1.52s/it][A
100%|██████████| 2/2 [00:03<00:00,  1.62s/it][A
3it [00:07,  2.58s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:01<00:01,  1.68s/it][A
100%|██████████| 2/2 [00:03<00:00,  1.63s/it][A
4it [00:10,  2.85s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:01<00:01,  1.76s/it][A
100%|██████████| 2/2 [00:03<00:00,  1.82s/it][A
5it [00:14,  3.13s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:03<00:03,  3.26s/it][A
100%|██████████| 2/2 [00:06<00:00,  3.08s/it][A
6it [00:20,  3.39s/it]


In [12]:
# Save as parquet file for large size
facility_mins.to_parquet(output_path+'icisFacilityEmissions_ihsAllPossible_w_uncertainties.parquet')