# Convert IHS materials data to consistent units and add uncertainties

In [2]:
import numpy as np
import pandas as pd

pd.options.mode.chained_assignment = None

In [3]:
# Data file paths
input_path = '../data/'
output_path = '../data/processed/'
ihs_materials_path = input_path+'extracted/ihsMaterials_Germany.csv'
unit_conversion_path = input_path+'extra_inputs/unit_conversions.xlsx'

In [4]:
# Read in materials
use_columns = ['Code', 'Data Version', 'Source', 'Source type', 'Target', 'Research Year', 'Geography', 'Product', 'Value', 'Value unit', 'Capacity unit']
ihs_region = 'Germany'

materials = pd.read_csv(ihs_materials_path)
materials = materials[use_columns].loc[materials['Geography'] == ihs_region]
materials.rename(columns={'Source':'Source/Object', 'Source type':'Type', 'Target':'Target/Process', 'Geography':'Country/Reg'}, inplace=True)
materials['MeasType'] = materials['Type'].replace({'Raw Material':'Recipe', 'By-Product':'Recipe'})
materials['Provenance'] = 'IHS PEP'

# Enforce negative values as by-products
materials['Type'] = ['By-Product' if val < 0 else def_type for val, def_type in zip(materials['Value'], materials['Type'])]

materials.head()

Unnamed: 0,Code,Data Version,Source/Object,Type,Target/Process,Research Year,Country/Reg,Product,Value,Value unit,Capacity unit,MeasType,Provenance
0,2M-584,2021 Q3,INITIATOR,Raw Material,(METH)ACRYLIC RESIN PELLETS BY A CONTINUOUS BU...,1991.0,Germany,POLYMETHYLMETHACRYLATE,,,MM KG/yr,Recipe,IHS PEP
1,2M-584,2021 Q3,METHYL ACRYLATE,Raw Material,(METH)ACRYLIC RESIN PELLETS BY A CONTINUOUS BU...,1991.0,Germany,POLYMETHYLMETHACRYLATE,0.122,TONNE/TONNE,MM KG/yr,Recipe,IHS PEP
2,2M-584,2021 Q3,METHYL METHACRYLATE,Raw Material,(METH)ACRYLIC RESIN PELLETS BY A CONTINUOUS BU...,1991.0,Germany,POLYMETHYLMETHACRYLATE,0.898,TONNE/TONNE,MM KG/yr,Recipe,IHS PEP
3,2M-584,2021 Q3,COOLING WATER,Utilities,(METH)ACRYLIC RESIN PELLETS BY A CONTINUOUS BU...,1991.0,Germany,POLYMETHYLMETHACRYLATE,19.110967,M3/TONNE,MM KG/yr,Utilities,IHS PEP
4,2M-584,2021 Q3,ELECTRICITY,Utilities,(METH)ACRYLIC RESIN PELLETS BY A CONTINUOUS BU...,1991.0,Germany,POLYMETHYLMETHACRYLATE,653.339914,KWH/TONNE,MM KG/yr,Utilities,IHS PEP


In [10]:
materials['Product'].unique()

array(['POLYMETHYLMETHACRYLATE', 'HCFC-141B', '1,12-DODECANEDIAMINE',
       '1,12-DODECANEDIOIC ACID', 'DINCH', '1,3-BUTADIENE', 'BUTADIENE',
       '1,3-PROPANEDIOL', '1,4-BUTANEDIOL AND TETRAHYDROFURAN',
       '1,4-BUTANEDIOL', '1,4-CYCLOHEXANEDIMETHANOL',
       '1-PHENYLETHYLAMINE®', '2246-ANTIOXIDANT',
       '2,4-DIMETHYL BENZALDEHYDE', '2,6-DI-TERT-BUTYLPHENOL',
       '2,6-DIETHYLANILINE', '2,6-DIMETHYLPHENOL', '2-ETHYLHEXANOL',
       '2-ETHYLHEXYL ACRYLATE', '2-ETHYLHEXYL DIPHENYL PHOSPHATE',
       '2-HYDROXY-4-METHYLTHIOBUTANOIC ACID', '2-H-4-O-BENZOPHENONE',
       '2-MERCAPTOBENZOTHIAZOLE', '2-PICOLINE', '3-HYDROXYPROPIONIC ACID',
       "4,4'-DICHLORODIPHENYL SULFONE", "4,4'-DIHYDROXYDIPHENYL",
       "4,4'-METHYLENEDIANILINE", '4-CHLORO-4-HYDROXY-BENZOPHENONE',
       '4-METHYLPENTENE-1', '4-MP-1/1-DECENE COPOLYMER',
       '6-HYDROXY-2-NAPHTHOIC ACID', 'ABS RESIN', 'ACESULFAME-K',
       'ACETAL HOMOPOLYMER', 'ACETALDEHYDE', 'ACETAMINOPHEN',
       'ACETIC ACID', 'AC

In [14]:
# Define unit conversion function

def convert_units(mat_df:pd.DataFrame, unit_conversion:pd.DataFrame, unit_name:str, unit_column:str, value_column:str, unit_name_in_col:str, target_unit:str, inv_density:bool=False, standard_conversion:int=1) -> pd.DataFrame:

    # Get appropriate conversions for unit
    unit_filts = unit_conversion[unit_conversion['Unit']==unit_name]
    unit_dict = dict(zip(unit_filts['Name'], unit_filts['density']))

    # Cycle through dataframe to get conversion factor for each row
    unit_mats = mat_df[mat_df[unit_column] == unit_name_in_col]
    conv_factor = np.zeros(len(unit_mats))
    for i, target in enumerate(unit_mats[value_column]):
        for type in unit_dict.keys():
            if target.startswith(type):
                conv_factor[i] = unit_dict[type]
                break
            if type == list(unit_dict.keys())[-1]:
                raise Exception(unit_name+' for '+target+' unit conversions missing')

    # Convert values
    if inv_density: # If converting from value in units to weight
        unit_mats['Value'] = (unit_mats['Value']/standard_conversion)/conv_factor
        #unit_mats['Value_sigma'] = (unit_mats['Value_sigma']/standard_conversion)/conv_factor
    else: # If converting proportion of other materials
        unit_mats['Value'] = (unit_mats['Value']/standard_conversion)*conv_factor
        #unit_mats['Value_sigma'] = (unit_mats['Value_sigma']/standard_conversion)*conv_factor
    unit_mats[unit_column] = target_unit

    # Concatenate converted values with rest of original dataframe
    return pd.concat((mat_df[mat_df[unit_column] != unit_name_in_col], unit_mats)).sort_values(['Code','Type'])

In [15]:
## Capacity unit conversions -> So all values are unit/tonne
""" BTL is for 2 types of plastic bottles - ignored for now
    M3 is only for water therefore 1000 KG equivalent and is for MM3 so 1/1000 for kg
    NM3 is for gases per MNM3 - conversions and to get tonne equivalent"""

# BTL filt
materials_filt = materials[materials['Capacity unit'] != 'MM BTL/yr']

# NM3 filt
unit_conversion = pd.read_excel(unit_conversion_path)
materials_filt = convert_units(materials_filt, unit_conversion, 'NM3', 'Capacity unit', 'Target/Process', 'MM NM3/yr', 'MM KG/yr', inv_density=True, standard_conversion=1)

# M3 filt
materials_filt['Value'] = materials_filt['Capacity unit'].replace({'MM KG/yr':1, 'MM M3/yr':1/1000}).astype(float)*materials_filt['Value'].astype(float)
materials_filt['Capacity unit'] = 'MM KG/yr'

In [16]:
## Value unit conversions -> So all values are in kg/kg

# Filter out EA and BOAT
materials_values = materials_filt[materials_filt['Value unit'] != 'EA']
materials_values = materials_values[materials_values['Value unit'] != 'EA/TONNE']
materials_values = materials_values[materials_values['Value unit'] != 'BOAT']

# NM3 conversion
materials_values = convert_units(materials_values, unit_conversion, 'NM3', 'Value unit', 'Source/Object', 'NM3', 'kg/kg', standard_conversion=1000)
materials_values = convert_units(materials_values, unit_conversion, 'NM3', 'Value unit', 'Source/Object', 'NM3/TONNE', 'kg/kg', standard_conversion=1000)

# MMCAL conversion
materials_values = convert_units(materials_values, unit_conversion, 'MMCAL', 'Value unit', 'Source/Object', 'MMCAL', 'kg/kg', standard_conversion=1000)
materials_values = convert_units(materials_values, unit_conversion, 'MMCAL', 'Value unit', 'Source/Object', 'MMCAL/TONNE', 'kg/kg', standard_conversion=1000)

# Remaining standard conversions
conversion_ratios = {'TONNE/TONNE':1, 'TONNE':1, 'KG/TONNE':1E-3, 'KG':1E-3, 'G/TONNE':1E-6, 'G':1E-6, 'M3/TONNE':1, 'M3':1, 'KWH/TONNE':1E-3, 'KWH':1E-3, 'kg/kg':1}
conversion_names = {'TONNE/TONNE':'kg/kg', 'TONNE':'kg/kg', 'KG/TONNE':'kg/kg', 'KG':'kg/kg', 'G/TONNE':'kg/kg', 'G':'kg/kg', 'M3/TONNE':'kg/kg', 'M3':'kg/kg', 'KWH/TONNE':'kWh/kg',  'KWH':'kWh/kg', 'kg/kg':'kg/kg'}
materials_values['Value'] = materials_values['Value unit'].replace(conversion_ratios)*materials_values['Value']
materials_values['Value unit'] = materials_values['Value unit'].replace(conversion_names)

materials_values

Unnamed: 0,Code,Data Version,Source/Object,Type,Target/Process,Research Year,Country/Reg,Product,Value,Value unit,Capacity unit,MeasType,Provenance
637,2M-1,2021 Q3,ACRYLONITRILE,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,0.250790,kg/kg,MM KG/yr,Recipe,IHS PEP
638,2M-1,2021 Q3,BUTADIENE,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,0.196470,kg/kg,MM KG/yr,Recipe,IHS PEP
639,2M-1,2021 Q3,MISC CHEMICALS,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,,,MM KG/yr,Recipe,IHS PEP
640,2M-1,2021 Q3,STYRENE,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,0.574110,kg/kg,MM KG/yr,Recipe,IHS PEP
641,2M-1,2021 Q3,COOLING WATER,Utilities,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,30.460712,kg/kg,MM KG/yr,Utilities,IHS PEP
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19895,2M-999,2022 Q1,TRIMETHYLHYDROQUINONE,Raw Material,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),0.350000,kg/kg,MM KG/yr,Recipe,IHS PEP
19896,2M-999,2022 Q1,COOLING WATER,Utilities,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),353.816608,kg/kg,MM KG/yr,Utilities,IHS PEP
19897,2M-999,2022 Q1,ELECTRICITY,Utilities,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),0.663150,kWh/kg,MM KG/yr,Utilities,IHS PEP
19898,2M-999,2022 Q1,PROCESS WATER,Utilities,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),5.082349,kg/kg,MM KG/yr,Utilities,IHS PEP


In [17]:
# Add uncertainty
uncertainty_factor = 0.05
materials_values['Value_sigma'] = abs(materials_values['Value'])*uncertainty_factor

In [18]:
materials_values

Unnamed: 0,Code,Data Version,Source/Object,Type,Target/Process,Research Year,Country/Reg,Product,Value,Value unit,Capacity unit,MeasType,Provenance,Value_sigma
637,2M-1,2021 Q3,ACRYLONITRILE,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,0.250790,kg/kg,MM KG/yr,Recipe,IHS PEP,0.012540
638,2M-1,2021 Q3,BUTADIENE,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,0.196470,kg/kg,MM KG/yr,Recipe,IHS PEP,0.009824
639,2M-1,2021 Q3,MISC CHEMICALS,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,,,MM KG/yr,Recipe,IHS PEP,
640,2M-1,2021 Q3,STYRENE,Raw Material,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,0.574110,kg/kg,MM KG/yr,Recipe,IHS PEP,0.028706
641,2M-1,2021 Q3,COOLING WATER,Utilities,ABS RESIN BY EMULSION/MASS POLYMERIZATION,1980.0,Germany,ABS RESIN,30.460712,kg/kg,MM KG/yr,Utilities,IHS PEP,1.523036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19895,2M-999,2022 Q1,TRIMETHYLHYDROQUINONE,Raw Material,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),0.350000,kg/kg,MM KG/yr,Recipe,IHS PEP,0.017500
19896,2M-999,2022 Q1,COOLING WATER,Utilities,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),353.816608,kg/kg,MM KG/yr,Utilities,IHS PEP,17.690830
19897,2M-999,2022 Q1,ELECTRICITY,Utilities,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),0.663150,kWh/kg,MM KG/yr,Utilities,IHS PEP,0.033158
19898,2M-999,2022 Q1,PROCESS WATER,Utilities,VITAMIN-E (ALPHA-TOCOPHEROL) FROM CITRAL VIA I...,2002.0,Germany,VITAMIN-E (ALPHA-TOCOPHEROL),5.082349,kg/kg,MM KG/yr,Utilities,IHS PEP,0.254117


In [19]:
# Output process recipes with consistent units and uncertainties
materials_values.reset_index(drop=True).to_csv(output_path+'ihsMaterials_w_uncertainties.csv')