### Import Libraries

In [62]:
import pandas as pd
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import janitor
from janitor import clean_names

### Import Data

In [63]:
# ------- define paths -------
# define relative paths
relative_path_raw = Path('../data/raw_data/')

# get absolute paths
absolute_path_raw = relative_path.resolve()
print(absolute_path)

# declare file names
filename_fuel_factors = "Fuel_Emission_Factors.xlsx"
filename_assess_recc = "assess_recc_ppi_emissions.csv"

# ------- import data -------

# import Fuel Emissions Factors database
fuel_emissions_df = pd.read_excel(absolute_path_raw/filename_fuel_factors, sheet_name='Fuel_Emission_Factors_Clean')

# import assess_recc_ppi_emissions database
fuel_assess_recc = pd.read_csv(absolute_path_raw/filename_assess_recc)

/Users/nmoraes/Capstone/industrialenergy_datainterface/data/raw_data


  fuel_assess_recc = pd.read_csv(absolute_path_raw/filename_assess_recc)


### Tidy Dataframe 

In [65]:
fuel_emissions_df.head(3)

Unnamed: 0,fuel_type,iac_fuel_code,fuel_units,Fuel mapping per Eric Masanet's table,CO2,AP - 42 source (link calculations),SO2,NOx,emission_factor_units,fuel_description
0,Natural Gas,E2,MMBtu,Natural Gas (pipeline weighted average),53.06,Natural gas combustion,0.000267,0.078934,kg/MMBtu,Natural gas is a gas consisting primarily of m...
1,L.P.G,E3,MMBtu,LPG (energy use),61.71,Liquified petroleum gas combustion,0.00672,0.066085,kg/MMBtu,LPG is a mixture of hydrocarbon gases used as ...
2,#1 Fuel Oil,E4,MMBtu,Kerosene,75.2,Fuel Oil Combustion,0.078212,0.055079,kg/MMBtu,Fuel Oil No. 1 is similar to kerosene and is t...


In [66]:
# print df column names
fuel_emissions_df.columns

Index(['fuel_type', 'iac_fuel_code', 'fuel_units',
       'Fuel mapping per Eric Masanet's table', 'CO2',
       'AP - 42 source (link calculations)', 'SO2', 'NOx',
       'emission_factor_units', 'fuel_description'],
      dtype='object')

In [68]:
# Select relevant columns
clean_fuel_emissions_df = fuel_emissions_df[['fuel_type', 'iac_fuel_code', 'fuel_units', 
                                             'CO2', 'SO2', 'NOx',
                                             'emission_factor_units', 'fuel_description']]

In [69]:
clean_fuel_emissions_df.head(2)

Unnamed: 0,fuel_type,iac_fuel_code,fuel_units,CO2,SO2,NOx,emission_factor_units,fuel_description
0,Natural Gas,E2,MMBtu,53.06,0.000267,0.078934,kg/MMBtu,Natural gas is a gas consisting primarily of m...
1,L.P.G,E3,MMBtu,61.71,0.00672,0.066085,kg/MMBtu,LPG is a mixture of hydrocarbon gases used as ...


In [70]:
# transform columns to match assess_recc table
rename_emissions_df = clean_fuel_emissions_df.copy()

In [71]:
rename_emissions_df.rename(columns={'fuel_type': 'energy_source', 
                                        'iac_fuel_code': 'sourccode',
                                        'fuel_units': 'units',
                                       'emission_factor_units': 'unit'}, inplace=True)

In [72]:
# View rename_emissions_df
rename_emissions_df.head(2)

Unnamed: 0,energy_source,sourccode,units,CO2,SO2,NOx,unit,fuel_description
0,Natural Gas,E2,MMBtu,53.06,0.000267,0.078934,kg/MMBtu,Natural gas is a gas consisting primarily of m...
1,L.P.G,E3,MMBtu,61.71,0.00672,0.066085,kg/MMBtu,LPG is a mixture of hydrocarbon gases used as ...


In [75]:
# Place CO2, SO2, NOx on top of each other

melted_df = rename_emissions_df.melt(
    id_vars=['energy_source', 'sourccode', 'units', 'unit', 'fuel_description'],
                    value_vars=['CO2', 'SO2', 'NOx'],
                    var_name='emission_type',
                    value_name='emission_factor')

In [76]:
melted_df.head(15)

Unnamed: 0,energy_source,sourccode,units,unit,fuel_description,emission_type,emission_factor
0,Natural Gas,E2,MMBtu,kg/MMBtu,Natural gas is a gas consisting primarily of m...,CO2,53.06
1,L.P.G,E3,MMBtu,kg/MMBtu,LPG is a mixture of hydrocarbon gases used as ...,CO2,61.71
2,#1 Fuel Oil,E4,MMBtu,kg/MMBtu,Fuel Oil No. 1 is similar to kerosene and is t...,CO2,75.2
3,#2 Fuel Oil,E5,MMBtu,kg/MMBtu,"Fuel Oil No. 2 is diesel fuel, which is common...",CO2,73.96
4,#4 Fuel Oil,E6,MMBtu,kg/MMBtu,Fuel Oil No. 4 is a blend of distillate and re...,CO2,74.753333
5,#6 Fuel Oil,E7,MMBtu,kg/MMBtu,Fuel Oil No. 6 is called residual fuel oil or ...,CO2,75.1
6,Coal,E8,MMBtu,kg/MMBtu,Coal is a fossil fuel which is composed primar...,CO2,94.67
7,Wood,E9,MMBtu,kg/MMBtu,Burning of wood is currently the largest use o...,CO2,93.8
8,Paper,E10,MMBtu,kg/MMBtu,Paper is a thin material mainly used for writi...,CO2,93.8
9,Other Gas,E11,MMBtu,kg/MMBtu,Other Gas includes the use of gaseous fuels as...,CO2,66.72


### Integrate Assess_Recc Table

In [77]:
# View all columns
pd.set_option('display.max_columns', None)

In [78]:
# View assess_recc
fuel_assess_recc.head(5)

Unnamed: 0,state,year,type_of_producer,energy_source,generation_megawatthours,units,emission_type,amount,emission_factor,unit,id,superid,center,ar_number,appcode,arc2,impstatus,impcost,ref_year_impcost,source_rank,sourccode,conserved,conserved_emissions,u,sourconsv,saved,rebate,incremntal,ic_capital,ic_other,payback,bptool,sic,naics,sales,employees,plant_area,products,produnits,prodlevel,prodhours,numars,ec_plant_cost,ec_plant_usage,ec_emissions,u.1,ed_plant_cost,ed_plant_usage,ef_plant_cost,e2_plant_cost,e2_plant_usage,e3_plant_cost,e3_plant_usage,e4_plant_cost,e4_plant_usage,e5_plant_cost,e5_plant_usage,e6_plant_cost,e6_plant_usage,e7_plant_cost,e7_plant_usage,e8_plant_cost,e8_plant_usage,e9_plant_cost,e9_plant_usage,e10_plant_cost,e10_plant_usage,e11_plant_cost,e11_plant_usage,e12_plant_cost,e12_plant_usage,w0_plant_cost,w0_plant_usage,w1_plant_cost,w1_plant_usage,w2_plant_cost,w2_plant_usage,w3_plant_cost,w3_plant_usage,w4_plant_cost,w4_plant_usage,w5_plant_cost,w5_plant_usage,w6_plant_cost,w6_plant_usage
0,TX,1990,Total Electric Power Industry,Total,281559635.0,MWh,CO2,213056168.0,0.7567,kg/kWh,AM0076,AM007601,AM,1,,2.3212,I,64000.0,64000.0,PSOURCCODE,EC,,,kg,,23000.0,N,N,,,2.782609,,3479.0,,10000000.0,80.0,,PIPECOATING,6.0,9038.0,2400.0,5.0,269266.0,3456917.0,2615849.0,kg,,,,29923.0,8238.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,TX,1990,Total Electric Power Industry,Total,281559635.0,MWh,SO2,568924.0,0.002021,kg/kWh,AM0076,AM007601,AM,1,,2.3212,I,64000.0,64000.0,PSOURCCODE,EC,,,kg,,23000.0,N,N,,,2.782609,,3479.0,,10000000.0,80.0,,PIPECOATING,6.0,9038.0,2400.0,5.0,269266.0,3456917.0,6985.103,kg,,,,29923.0,8238.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,TX,1990,Total Electric Power Industry,Total,281559635.0,MWh,NOx,662759.0,0.002354,kg/kWh,AM0076,AM007601,AM,1,,2.3212,I,64000.0,64000.0,PSOURCCODE,EC,,,kg,,23000.0,N,N,,,2.782609,,3479.0,,10000000.0,80.0,,PIPECOATING,6.0,9038.0,2400.0,5.0,269266.0,3456917.0,8137.185,kg,,,,29923.0,8238.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,TX,1990,Total Electric Power Industry,Total,281559635.0,MWh,CO2,213056168.0,0.7567,kg/kWh,AM0076,AM007601,AM,1,,2.3212,I,64000.0,64000.0,SSOURCCODE,,,,kg,,,N,N,,,2.782609,,3479.0,,10000000.0,80.0,,PIPECOATING,6.0,9038.0,2400.0,5.0,269266.0,3456917.0,2615849.0,kg,,,,29923.0,8238.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,TX,1990,Total Electric Power Industry,Total,281559635.0,MWh,SO2,568924.0,0.002021,kg/kWh,AM0076,AM007601,AM,1,,2.3212,I,64000.0,64000.0,SSOURCCODE,,,,kg,,,N,N,,,2.782609,,3479.0,,10000000.0,80.0,,PIPECOATING,6.0,9038.0,2400.0,5.0,269266.0,3456917.0,6985.103,kg,,,,29923.0,8238.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [79]:
fuel_assess_recc.columns

Index(['state', 'year', 'type_of_producer', 'energy_source',
       'generation_megawatthours', 'units', 'emission_type', 'amount',
       'emission_factor', 'unit', 'id', 'superid', 'center', 'ar_number',
       'appcode', 'arc2', 'impstatus', 'impcost', 'ref_year_impcost',
       'source_rank', 'sourccode', 'conserved', 'conserved_emissions', 'u',
       'sourconsv', 'saved', 'rebate', 'incremntal', 'ic_capital', 'ic_other',
       'payback', 'bptool', 'sic', 'naics', 'sales', 'employees', 'plant_area',
       'products', 'produnits', 'prodlevel', 'prodhours', 'numars',
       'ec_plant_cost', 'ec_plant_usage', 'ec_emissions', 'u.1',
       'ed_plant_cost', 'ed_plant_usage', 'ef_plant_cost', 'e2_plant_cost',
       'e2_plant_usage', 'e3_plant_cost', 'e3_plant_usage', 'e4_plant_cost',
       'e4_plant_usage', 'e5_plant_cost', 'e5_plant_usage', 'e6_plant_cost',
       'e6_plant_usage', 'e7_plant_cost', 'e7_plant_usage', 'e8_plant_cost',
       'e8_plant_usage', 'e9_plant_cost', 'e9_pla

In [81]:
# Use 'merge' function
fuel_assess_recc_merge = pd.merge(
    fuel_assess_recc, 
    melted_df,
    on=['energy_source', 'sourccode', 'units', 'unit', 'emission_type', 'emission_fact'],
    how='outer'  # Use 'outer' to keep all rows from both dataframes
)

In [102]:
fuel_assess_recc_merge.head()

Unnamed: 0,state,year,type_of_producer,energy_source,generation_megawatthours,units,emission_type,amount,emission_factor,unit,id,superid,center,ar_number,appcode,arc2,impstatus,impcost,ref_year_impcost,source_rank,sourccode,conserved,conserved_emissions,u,sourconsv,saved,rebate,incremntal,ic_capital,ic_other,payback,bptool,sic,naics,sales,employees,plant_area,products,produnits,prodlevel,prodhours,numars,ec_plant_cost,ec_plant_usage,ec_emissions,u.1,ed_plant_cost,ed_plant_usage,ef_plant_cost,e2_plant_cost,e2_plant_usage,e3_plant_cost,e3_plant_usage,e4_plant_cost,e4_plant_usage,e5_plant_cost,e5_plant_usage,e6_plant_cost,e6_plant_usage,e7_plant_cost,e7_plant_usage,e8_plant_cost,e8_plant_usage,e9_plant_cost,e9_plant_usage,e10_plant_cost,e10_plant_usage,e11_plant_cost,e11_plant_usage,e12_plant_cost,e12_plant_usage,w0_plant_cost,w0_plant_usage,w1_plant_cost,w1_plant_usage,w2_plant_cost,w2_plant_usage,w3_plant_cost,w3_plant_usage,w4_plant_cost,w4_plant_usage,w5_plant_cost,w5_plant_usage,w6_plant_cost,w6_plant_usage,fuel_description
0,,,,#1 Fuel Oil,,MMBtu,CO2,,75.2,kg/MMBtu,,,,,,,,,,,E4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fuel Oil No. 1 is similar to kerosene and is t...
1,,,,#1 Fuel Oil,,MMBtu,NOx,,0.055079,kg/MMBtu,,,,,,,,,,,E4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fuel Oil No. 1 is similar to kerosene and is t...
2,,,,#1 Fuel Oil,,MMBtu,SO2,,0.078212,kg/MMBtu,,,,,,,,,,,E4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fuel Oil No. 1 is similar to kerosene and is t...
3,,,,#2 Fuel Oil,,MMBtu,CO2,,73.96,kg/MMBtu,,,,,,,,,,,E5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"Fuel Oil No. 2 is diesel fuel, which is common..."
4,,,,#2 Fuel Oil,,MMBtu,NOx,,0.055079,kg/MMBtu,,,,,,,,,,,E5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"Fuel Oil No. 2 is diesel fuel, which is common..."


In [96]:
fuel_assess_recc_merge[fuel_assess_recc_merge['state'] == 'TX']

Unnamed: 0,state,year,type_of_producer,energy_source,generation_megawatthours,units,emission_type,amount,emission_factor,unit,id,superid,center,ar_number,appcode,arc2,impstatus,impcost,ref_year_impcost,source_rank,sourccode,conserved,conserved_emissions,u,sourconsv,saved,rebate,incremntal,ic_capital,ic_other,payback,bptool,sic,naics,sales,employees,plant_area,products,produnits,prodlevel,prodhours,numars,ec_plant_cost,ec_plant_usage,ec_emissions,u.1,ed_plant_cost,ed_plant_usage,ef_plant_cost,e2_plant_cost,e2_plant_usage,e3_plant_cost,e3_plant_usage,e4_plant_cost,e4_plant_usage,e5_plant_cost,e5_plant_usage,e6_plant_cost,e6_plant_usage,e7_plant_cost,e7_plant_usage,e8_plant_cost,e8_plant_usage,e9_plant_cost,e9_plant_usage,e10_plant_cost,e10_plant_usage,e11_plant_cost,e11_plant_usage,e12_plant_cost,e12_plant_usage,w0_plant_cost,w0_plant_usage,w1_plant_cost,w1_plant_usage,w2_plant_cost,w2_plant_usage,w3_plant_cost,w3_plant_usage,w4_plant_cost,w4_plant_usage,w5_plant_cost,w5_plant_usage,w6_plant_cost,w6_plant_usage,fuel_description
154,TX,2002.0,Total Electric Power Industry,Total,385628542.0,MWh,CO2,258398088.0,0.670070,kg/kWh,AM0423,AM042303,AM,3.0,2.0,2.4323,I,25000.0,25000.0,PSOURCCODE,E11,495000.0,,kg,,1311800.0,N,N,,25000.0,0.013816,,2911.0,324110.0,1.200000e+09,325.0,,"Refined petroleum products, sulfur",4.0,42000000.0,8760.0,14.0,2512297.0,41194817.0,2.760341e+07,kg,,,,43870282.0,10982620.0,,,,,,,,,,,,,,,,,,,,,,,440329.0,890152000.0,,,,,,,,,,,
155,TX,2002.0,Total Electric Power Industry,Total,385628542.0,MWh,CO2,258398088.0,0.670070,kg/kWh,AM0423,AM042307,AM,7.0,2.0,4.6320,N,249000.0,,PSOURCCODE,E11,50400.0,,kg,,133600.0,N,N,47000.0,202000.0,1.870774,,2911.0,324110.0,1.200000e+09,325.0,,"Refined petroleum products, sulfur",4.0,42000000.0,8760.0,14.0,2512297.0,41194817.0,2.760341e+07,kg,,,,43870282.0,10982620.0,,,,,,,,,,,,,,,,,,,,,,,440329.0,890152000.0,,,,,,,,,,,
159,TX,1993.0,Total Electric Power Industry,Total,299688715.0,MWh,CO2,227087867.0,0.757746,kg/kWh,AM0173,AM017304,AM,4.0,2.0,2.2135,I,600.0,600.0,PSOURCCODE,E11,1520.0,,kg,,3670.0,N,N,,,0.163488,,2819.0,,1.800000e+07,98.0,,hydrogen peroxide and butyllithium,2.0,82000000.0,8568.0,6.0,1339944.0,34665006.0,2.626726e+07,kg,,,,,,,,,,,,,,,,,,,,,,,,827587.0,272410.0,,,,,,,,,,,,,,,
270,TX,2002.0,Total Electric Power Industry,Total,385628542.0,MWh,NOx,332612.0,0.000863,kg/kWh,AM0423,AM042303,AM,3.0,2.0,2.4323,I,25000.0,25000.0,PSOURCCODE,E11,495000.0,,kg,,1311800.0,N,N,,25000.0,0.013816,,2911.0,324110.0,1.200000e+09,325.0,,"Refined petroleum products, sulfur",4.0,42000000.0,8760.0,14.0,2512297.0,41194817.0,3.553132e+04,kg,,,,43870282.0,10982620.0,,,,,,,,,,,,,,,,,,,,,,,440329.0,890152000.0,,,,,,,,,,,
271,TX,2002.0,Total Electric Power Industry,Total,385628542.0,MWh,NOx,332612.0,0.000863,kg/kWh,AM0423,AM042307,AM,7.0,2.0,4.6320,N,249000.0,,PSOURCCODE,E11,50400.0,,kg,,133600.0,N,N,47000.0,202000.0,1.870774,,2911.0,324110.0,1.200000e+09,325.0,,"Refined petroleum products, sulfur",4.0,42000000.0,8760.0,14.0,2512297.0,41194817.0,3.553132e+04,kg,,,,43870282.0,10982620.0,,,,,,,,,,,,,,,,,,,,,,,440329.0,890152000.0,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1569087,TX,1997.0,Total Electric Power Industry,Total,336319510.0,MWh,SO2,777211.0,0.002311,kg/kWh,KG0100,KG010003,KG,3.0,4.0,3.5315,K,,,TSOURCCODE,,,,kg,,,N,N,,,,,2051.0,,2.000000e+07,290.0,107127.0,"Sliced bread, hotdog and hamburger buns and co...",2.0,40000000.0,8760.0,4.0,338229.0,11826788.0,2.733088e+04,kg,262684.0,19402.0,1560.0,412357.0,87570.0,,,,,,,,,,,,,,,,,,,,,,,,,570.0,110.0,,,141174.0,983630.0,,,,,
1569088,TX,1997.0,Total Electric Power Industry,Total,336319510.0,MWh,SO2,777211.0,0.002311,kg/kWh,KG0100,KG010003,KG,3.0,4.0,3.5315,K,,,QSOURCCODE,,,,kg,,,N,N,,,,,2051.0,,2.000000e+07,290.0,107127.0,"Sliced bread, hotdog and hamburger buns and co...",2.0,40000000.0,8760.0,4.0,338229.0,11826788.0,2.733088e+04,kg,262684.0,19402.0,1560.0,412357.0,87570.0,,,,,,,,,,,,,,,,,,,,,,,,,570.0,110.0,,,141174.0,983630.0,,,,,
1569089,TX,1997.0,Total Electric Power Industry,Total,336319510.0,MWh,SO2,777211.0,0.002311,kg/kWh,KG0100,KG010004,KG,4.0,4.0,3.5315,K,,,SSOURCCODE,,,,kg,,,N,N,,,,,2051.0,,2.000000e+07,290.0,107127.0,"Sliced bread, hotdog and hamburger buns and co...",2.0,40000000.0,8760.0,4.0,338229.0,11826788.0,2.733088e+04,kg,262684.0,19402.0,1560.0,412357.0,87570.0,,,,,,,,,,,,,,,,,,,,,,,,,570.0,110.0,,,141174.0,983630.0,,,,,
1569090,TX,1997.0,Total Electric Power Industry,Total,336319510.0,MWh,SO2,777211.0,0.002311,kg/kWh,KG0100,KG010004,KG,4.0,4.0,3.5315,K,,,TSOURCCODE,,,,kg,,,N,N,,,,,2051.0,,2.000000e+07,290.0,107127.0,"Sliced bread, hotdog and hamburger buns and co...",2.0,40000000.0,8760.0,4.0,338229.0,11826788.0,2.733088e+04,kg,262684.0,19402.0,1560.0,412357.0,87570.0,,,,,,,,,,,,,,,,,,,,,,,,,570.0,110.0,,,141174.0,983630.0,,,,,
