## Obtain process impacts by combining process outputs with impact factors

Inputs: - Process Excel file downloaded from OpenLCA
        - Impact factor conversion xml files downloaded from OpenLCA

Output: - Csv file of impact for each gas for all processes in input file

In [3]:
# Import packages
import os
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
pd.options.mode.chained_assignment = None

In [2]:
# Variable definition - INSERT YOUR PATHS TO THE DATA HERE
data_path = ""
lca_path = data_path+'APOS Cumulative LCIA v3.10_raw.xlsx'
lci_path = data_path+'APOS Cumulative LCI v3.10.csv'

output_path = '/data/extracted/'
# impacts_path = output_path+'/EI_3_8_IPCC2021_CO2e.csv'

In [3]:
lca_raw = pd.read_excel(lca_path, sheet_name='LCIA')
lca = lca_raw[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'IPCC 2021 no LT.2', 'IPCC 2021 no LT.3', 'IPCC 2021 no LT.4']] # Take only relevant columns
lca = lca[3:] # Skip header rows
lca[lca.columns[4:]] = lca[lca.columns[4:]].astype(float)
lca.columns = ['generalComment', 'location', 'name', 'unit', 'CO2e_100a', 'CO2e_20a', 'CO2e_500a'] # Rename columns

In [4]:
# Get lci data and get columns
lci_raw = pd.read_csv(lci_path, low_memory=False)
gases = ['Carbon dioxide', 'Carbon monoxide', 'Chloroform', 'Dinitrogen monoxide', 'Ethane', 'Methane', 'Nitric oxide', 'Nitrogen fluoride', 'Perfluoropentane', 'Sulfur hexafluoride']
columns = [['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4']] + [[col for col in lci_raw.columns if gas in col] for gas in gases]
lci = lci_raw[[item for sublist in columns for item in sublist]]
lci.columns = ['generalComment', 'location', 'name', 'unit'] + lci.columns[4:].tolist()
lci = lci[3:]
lci[lci.columns[4:]] = lci[lci.columns[4:]].astype(float)

for gas in gases:
    lci[gas] = lci[[col for col in lci.columns if gas in col]].sum(axis=1)

lci = lci[['generalComment', 'location', 'name', 'unit'] + gases]

In [5]:
merged = lca.merge(lci, on=['generalComment', 'location', 'name', 'unit'], how='inner')

In [7]:
merged[merged['generalComment'] == 'market for propylene']

Unnamed: 0,generalComment,location,name,unit,CO2e_100a,CO2e_20a,CO2e_500a,Carbon dioxide,Carbon monoxide,Chloroform,Dinitrogen monoxide,Ethane,Methane,Nitric oxide,Nitrogen fluoride,Perfluoropentane,Sulfur hexafluoride
13163,market for propylene,RoW,propylene,kg,2.968906,3.989367,2.58149,2.449159,0.00283,1.154611e-08,3.4e-05,8.5e-05,0.019339,5.95309e-09,1.22453e-13,0.0,1.437745e-07
13164,market for propylene,RER,propylene,kg,1.947748,2.803915,1.623447,1.516054,0.001805,1.166096e-08,2.1e-05,6.9e-05,0.016255,4.22069e-09,1.03216e-13,0.0,5.657968e-08
13165,market for propylene,ZA,propylene,kg,12.2648,13.637741,11.672402,11.465777,0.00635,4.017742e-08,0.000548,7.2e-05,0.026051,1.39823e-07,1.07344e-13,0.0,2.427626e-07


In [69]:
merged = lca.merge(lci, on=['generalComment', 'location', 'name', 'unit'], how='inner')

# For each [generalComment, location, name, unit] only keep the row with location GLO if it exists, otherwise take average of rows with same [generalComment, name, unit] and set location as AVG
def process_group(group):
    if 'GLO' in group['location'].values:
        # Keep only the row with location 'GLO'
        return group[group['location'] == 'GLO']
    else:
        # Calculate average of numeric columns and set location to 'AVG'
        avg_row = group.mean(numeric_only=True)
        avg_row['location'] = 'AVG'
        # Copy other non-numeric values from the first row
        for col in ['generalComment', 'name', 'unit']:
            avg_row[col] = group[col].iloc[0]
        return pd.DataFrame([avg_row], columns=group.columns)

# Group by 'generalComment', 'name', 'unit', and apply the function
single_loc = merged.groupby(['generalComment', 'name', 'unit'], as_index=False).apply(process_group).reset_index(drop=True)

In [73]:
# Add EcoInvent uncertainty
ecoinvent_uncertainty = 0.1

for gas in merged.columns[4:]:
    single_loc[gas+'_sigma'] = single_loc[gas]*ecoinvent_uncertainty

In [77]:
single_loc.to_csv(output_path+'EI_3_10_APOS_EFs.csv', index=False)

In [157]:
cut_down = pd.read_csv(output_path+'EI_3_10_APOS_EFs.csv')
cut_down = cut_down.rename(columns={'generalComment':'Source', 'name':'Product'})
matches = pd.read_csv('/data/processed/ihs_to_ei_matches_formatted.csv', index_col=0)
cut_down = cut_down[cut_down['Source'].isin(list(matches['ei'].unique()))]

In [158]:
# For each source only keep the row where the string in the product column is in the source column. If none of the rows have the string in the product column in the source column, keep the first row
def process_group(group):
    if len(group) > 0:
        within = [i in j for i, j in zip(group['Product'], group['Source'])]
        if sum(within) > 0:
            return group[within]
        else:
            return group.iloc[0:1]
    else:
        return group

output = cut_down.groupby('Source', as_index=False).apply(process_group).reset_index(drop=True)

In [159]:
converted = output.copy()
# FAO numbers - https://www.fao.org/3/T0269E/t0269e0c.htm
unit_convs = {'coke': 28.4, 'natural gas, liquefied': 1/0.735}  
converted['conversion'] = converted['Product'].map(unit_convs).fillna(1)

for gas in ['CO2e_20a', 'CO2e_100a', 'CO2e_500a'] + gases:
    converted[gas] = converted[gas]*converted['conversion']
    converted[gas+'_sigma'] = converted[gas+'_sigma']*converted['conversion']

converted['unit'] = 'kg'

In [160]:
converted.drop_duplicates(subset=['Source']).drop(columns=['conversion', 'unit']).to_csv(output_path+'EI_3_10_APOS_EFs_in_IHS.csv')