## Obtain process impacts by combining process outputs with impact factors

Inputs: - Process Excel file downloaded from OpenLCA
        - Impact factor conversion xml files downloaded from OpenLCA

Output: - Csv file of impact for each gas for all processes in input file

In [1]:
# Import packages
import os
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
pd.options.mode.chained_assignment = None

In [2]:
# Variable definition
data_path = "C:/Users/lukec/OneDrive - University of Cambridge/PhD/Data/EcoInvent/"
lcaDataPath = data_path+"full_LCI/Consequential_v3_9_csv.csv"
impactMethods_20 = data_path+"impact_methods/EI_3_8_IPCC2013_GWP20a.xml"
impactMethods_100 = data_path+ "impact_methods/EI_3_8_IPCC2013_GWP100a.xml"

output_path = '../data/extracted/'

In [3]:
# Define functions
def to_listlist(inlist: list):
    """Ensures element is a list of lists even if single inner list"""
    return [inlist] if type(inlist[0]) is not list else inlist

def read_xml_attributes(filepath:str,branches:list,attributes:list,df=False):
    """This function creates a dataframe of attributes within an xml file.\n
    Inputs:\n
    filepath - path to xml file\n
    branches - list of successive branch choices\n
    attributes - attributes to be read from chosen branch\n
    Outputs:\n
    df - pandas dataframe of attributes for each end branch\n
    Requirements: pandas as pd, xml.etree.ElementTree as ET"""
    if df is False: df = pd.DataFrame([],columns=[i for sublist in to_listlist(attributes) for i in sublist])
    tree_loc = ET.parse(filepath). getroot()
    branches, attributes = to_listlist(branches), to_listlist(attributes)
    attr_values = dict()
    for pathnum, path in enumerate(branches):
        for branch in path:
            for num,val in enumerate([branch in i.tag for i in tree_loc]):
                if val:
                    if branch is path[-1]:
                        attr_values.update(dict(zip(attributes[pathnum],[tree_loc[num].attrib[i] if i else tree_loc[num].text for i in attributes[pathnum]])))
                        if path is branches[-1]:
                            df = pd.concat([df, pd.DataFrame(np.array([[i] for i in attr_values.values()]).transpose(),columns=attr_values.keys())],ignore_index=True)
                    else:
                        tree_loc=tree_loc[num]
                        break
    return df

In [4]:
# Fetch and display impact factors
impacts_20, impacts_100 = [read_xml_attributes(input_file,['dataset','flowData','exchange'],['name','category','subCategory','meanValue']).rename(columns={'meanValue':name}) for input_file, name in zip([impactMethods_20, impactMethods_100], ['CO2e_20a', 'CO2e_100a'])]

impacts = impacts_20.merge(impacts_100, on=list(impacts_20.columns[:-1])).sort_values('name').reset_index(drop=True)

impacts.to_csv(output_path+'EI_3_8_IPCC2013_CO2e.csv')
impacts

Unnamed: 0,name,category,subCategory,CO2e_20a,CO2e_100a
0,"Carbon dioxide, fossil",Emission to air,low population density,1.0,1.0
1,"Carbon dioxide, fossil",Emission to air,unspecified,1.0,1.0
2,"Carbon dioxide, fossil",Emission to air,"low population density, long-term",1.0,1.0
3,"Carbon dioxide, fossil",Emission to air,lower stratosphere + upper troposphere,1.0,1.0
4,"Carbon dioxide, fossil",Emission to air,high population density,1.0,1.0
...,...,...,...,...,...
206,Sulfur hexafluoride,Emission to air,high population density,17499.90776451,23506.81999316
207,Sulfur hexafluoride,Emission to air,unspecified,17499.90776451,23506.81999316
208,Sulfur hexafluoride,Emission to air,"low population density, long-term",17499.90776451,23506.81999316
209,Sulfur hexafluoride,Emission to air,low population density,17499.90776451,23506.81999316


In [5]:
# Calculate emissions from LCA data inventories
# Load impact factors
impacts = pd.read_csv(output_path+'EI_3_8_IPCC2013_CO2e.csv', index_col=0)

# Load process outputs
processes_raw = pd.read_csv(lcaDataPath, header=[0,1,2], low_memory=False)

processes = processes_raw.rename(columns=dict(zip(processes_raw.iloc[0].reset_index()['level_0'][:5], processes_raw.iloc[0].values[:5])))[1:]
processes = processes.set_index(list(processes.columns[:6])).astype(float)

In [6]:
processes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,"1,2-Dichlorobenzene","1,2-Dichlorobenzene","1,3-Dioxolan-2-one","1,4-Butanediol","1,4-Butanediol","1,4-Butanediol",1-Pentanol,1-Pentanol,1-Pentene,1-Pentene,...,o-Nitrotoluene,o-Xylene,o-Xylene,o-Xylene,t-Butyl methyl ether,t-Butyl methyl ether,t-Butyl methyl ether,t-Butylamine,t-Butylamine,tau-Fluvalinate
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,air,water,water,air,air,water,air,water,air,water,...,air,air,air,water,air,water,water,air,water,soil
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,urban air close to ground,surface water,unspecified,non-urban air or from high stacks,urban air close to ground,surface water,urban air close to ground,surface water,urban air close to ground,surface water,...,urban air close to ground,unspecified,urban air close to ground,unspecified,urban air close to ground,surface water,unspecified,urban air close to ground,surface water,agricultural
"(Activity UUID_Product UUID, Unnamed: 0_level_1, Unnamed: 0_level_2)","(Activity Name, Unnamed: 1_level_1, Unnamed: 1_level_2)","(Geography, Unnamed: 2_level_1, Unnamed: 2_level_2)","(Reference Product Name, Unnamed: 3_level_1, Unnamed: 3_level_2)","(Reference Product Unit, Unnamed: 4_level_1, Unnamed: 4_level_2)","(Exchange Name, Compartment, Subcompartment)",Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3
c527b1db-ee62-5322-b806-679f4a317e3a_807906d0-f3cb-4a7c-a528-ae497a61bf12,"1,1-difluoroethane production, HFC-152a",RoW,"1,1-difluoroethane, HFC-152a",kg,1,-2.974310e-10,2.392790e-07,0.0,0.0,-3.039840e-11,-6.991650e-11,-3.796080e-11,-9.110720e-11,5.729020e-11,-6.884830e-11,...,-8.394500e-11,2.503020e-08,9.043820e-10,3.479560e-16,8.383710e-09,2.771370e-13,1.765240e-10,-6.570570e-11,-1.576930e-10,0.0
4be0d9cf-b5f5-5074-b618-3d9af1b9e239_807906d0-f3cb-4a7c-a528-ae497a61bf12,"1,1-difluoroethane production, HFC-152a",US,"1,1-difluoroethane, HFC-152a",kg,1,-3.830500e-10,2.331140e-07,0.0,0.0,-4.606570e-11,-1.059510e-10,-4.880260e-11,-1.171280e-10,6.069650e-11,-8.851170e-11,...,-1.079130e-10,2.849260e-08,1.025890e-09,-3.719410e-16,8.546970e-09,2.754010e-13,1.612560e-10,-8.465160e-11,-2.031630e-10,0.0
8847e7d2-bf94-51a3-afdc-57835fc0e2ce_41bae23f-237d-4ba6-9b1d-73d5f4baee55,"1,1-dimethylcyclopentane to generic market for solvent, organic",GLO,"1,1-dimethylcyclopentane",kg,1,7.309770e-11,1.884620e-08,0.0,0.0,2.067760e-11,4.755860e-11,9.167310e-12,2.200180e-11,2.567950e-11,1.662650e-11,...,2.033090e-11,1.231470e-08,1.984630e-10,3.426100e-15,2.305430e-09,7.244840e-14,2.952320e-10,1.618520e-11,3.884430e-11,0.0
a6564925-eec5-5594-9b01-77ec9251561a_95aedd09-379e-5ee5-b2af-d220968c898b,1-methoxy-2-propanol production,GLO,1-methoxy-2-propanol,kg,1,6.362820e-12,1.406750e-07,0.0,0.0,2.607020e-11,5.996140e-11,4.962110e-13,1.190920e-12,7.246990e-11,8.999610e-13,...,1.240760e-12,2.264290e-08,7.599170e-10,5.212180e-15,1.928660e-08,6.528700e-13,3.680970e-10,1.683100e-12,4.039450e-12,0.0
314fd77e-fb24-5104-a130-cc6a45583835_e24e6570-ecf4-5e34-8d59-a9066687bf17,1-methylcyclopropene production,GLO,1-methylcyclopropene,kg,1,3.094560e-12,2.289740e-07,0.0,0.0,3.959650e-11,9.107190e-11,-7.375600e-14,-1.770180e-13,1.050880e-10,-1.337700e-13,...,6.796070e-14,4.131430e-08,1.110900e-09,1.327600e-14,2.337960e-08,7.709380e-13,8.687540e-10,1.084200e-12,2.602130e-12,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
dc5e84bc-51e8-5681-854d-2a1c4440ec59_08663c56-b6b9-4a91-b2e3-7478da4f27ba,"zirconium oxide to generic market for electrolyte, for solid oxide fuel cell",GLO,"electrolyte, for solid oxide fuel cell",kg,1,1.805930e-10,2.972250e-07,0.0,0.0,1.002480e-10,2.305710e-10,2.160260e-11,5.184700e-11,1.722600e-10,3.917990e-11,...,4.734140e-11,1.414550e-08,1.555490e-09,-3.496730e-14,-4.868060e-06,-1.312930e-10,-2.669110e-10,4.038620e-11,9.692650e-11,0.0
a41d149b-f51d-52e4-b34a-39a133344781_11985fce-629a-40c9-b2bc-03b9f707bced,"zirconium sponge production, from zirconium tetrachloride",FR,"zirconium sponge, nuclear-grade",kg,1,-1.225910e-09,5.506090e-07,0.0,0.0,-7.068700e-11,-1.625820e-10,-1.655730e-10,-3.973810e-10,1.073390e-10,-3.002940e-10,...,-3.601470e-10,6.876580e-08,2.330290e-09,-7.946510e-15,-7.097800e-06,-1.879610e-10,4.617910e-09,-2.681390e-10,-6.435290e-10,0.0
0606da77-7e3a-5566-a49f-0733ec65e9ea_11985fce-629a-40c9-b2bc-03b9f707bced,"zirconium sponge production, from zirconium tetrachloride",RoW,"zirconium sponge, nuclear-grade",kg,1,3.852520e-10,8.006540e-07,0.0,0.0,2.501910e-10,5.754380e-10,3.827200e-11,9.185410e-11,3.032070e-10,6.941270e-11,...,9.108220e-11,1.546320e-07,2.776240e-09,-5.487860e-15,-7.155260e-06,-1.913320e-10,5.636610e-09,8.854000e-11,2.124960e-10,0.0
f8cc5fc3-0212-5318-9746-f0cfd8005f6b_11985fce-629a-40c9-b2bc-03b9f707bced,"zirconium sponge production, from zirconium tetrachloride",US,"zirconium sponge, nuclear-grade",kg,1,-1.325830e-09,5.379240e-07,0.0,0.0,-8.686470e-11,-1.997910e-10,-1.782560e-10,-4.278200e-10,1.305100e-10,-3.232970e-10,...,-3.876510e-10,1.376280e-07,2.678720e-09,-1.003790e-14,-7.157960e-06,-1.914950e-10,5.293380e-09,-2.899270e-10,-6.958200e-10,0.0


In [126]:
elements = processes_raw.transpose().reset_index()[['level_0', 'level_1', 'level_2']]
conversions = elements.merge(impacts, left_on=['level_0', 'level_2'], right_on=['name', 'subCategory'], how='inner').set_index(['name', 'level_1', 'subCategory'])

In [185]:
## Calculate CO2e emissions

def calc_emissions(processes:pd.DataFrame, conversions:pd.DataFrame, conversion_col:str='CO2e_100a') -> np.ndarray:
    """Function calculates the CO2e emissions given process outputs and conversion factors"""
    emitting_cols = processes[conversions.index]
    for column in conversions.index:
        emitting_cols[column] = emitting_cols[column].values*conversions.loc[column][conversion_col]

    return emitting_cols.sum(axis=1).values

output_df = processes[[]].reset_index()
output_df.columns = output_df.columns.droplevel([1, 2])
output_df.rename(columns={'Exchange Name': 'Reference Value'}, inplace=True)
output_df['CO2e_20a'] = calc_emissions(processes, conversions, 'CO2e_20a')
output_df['CO2e_100a'] = calc_emissions(processes, conversions, 'CO2e_100a')

In [187]:
# Save output to file
output_df.to_csv(output_path+'conversionFactors_from_ecoinvent_IPCC2013.csv', index=False)