In [None]:
# Import libraries
import pandas as pd
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import janitor
from janitor import clean_names


In [None]:
# Import datasets

# ------- define paths -------
relative_path = Path('../../data/intermediate_data/') # define relative path
absolute_path = relative_path.resolve() # get absolute path


# ------- import data -------
assess_df = pd.read_csv(absolute_path/'iac_assess_tidy.csv') # import IAC assess dataset
ec_emissions_df = pd.read_csv(absolute_path/'emissions_tidy.csv') # import emissions dataset
ec_generation_df = pd.read_csv(absolute_path/'generation.csv') # import electricity generation dataset
recc_integrated_ppi_df = pd.read_csv(absolute_path/'recc_integrated_ppi.csv') # import an integrated recc dataset with adjusted impcost
fuel_emission_factors_df = pd.read_excel(absolute_path/'emission_factors_tidy.xlsx', sheet_name='Sheet1') # import fuel emission factors

In [None]:
ec_emissions_df

In [None]:
fuel_emission_factors_df

In [None]:
# remove records that don't have any values for power sources other than primary
recc_integrated_ppi_df = recc_integrated_ppi_df.dropna(subset=['sourccode', 'conserved','sourconsv','saved'], how='all')

In [None]:
# verify the rows with blank values are not in the dataframe
recc_integrated_ppi_df[recc_integrated_ppi_df['superid']=='AM043901']

In [None]:
# add Sector and state attributes to recc_integrated_ppi_df from assess_df
# integrate assess_df into 
integrated_ppi_df = pd.merge(recc_integrated_ppi_df, assess_df,
                             left_on=['sourccode', 'id'], 
                             right_on=['source_code', 'id'], 
                             how='left')

integrated_ppi_df.drop_duplicates(inplace = True)

In [None]:
# check unique power source codes 
integrated_ppi_df['sourccode'].unique()


In [None]:
integrated_ppi_df[integrated_ppi_df['superid']=='WV061012']

In [None]:
integrated_ppi_df[integrated_ppi_df['superid']=='AM057403']

#### Merge Fuel Emission Factors into the integrated recc table

In [None]:
# add fuel emission factors to the integrated recc df
integrated_df = pd.merge(integrated_ppi_df, fuel_emission_factors_df[['sourccode','emission_type','emission_factor','emission_factor_units']],
                                  on='sourccode',
                                  how='left')

In [None]:
integrated_df[integrated_df['superid'].isin(['WV061012', 'AM057403'])]

#### Calculate fuel emission factors

In [None]:
# Calculate fuel emissions avoided
integrated_df['emissions_avoided'] = integrated_df['emission_factor'] * integrated_df['conserved']

In [None]:
integrated_df.columns

In [None]:
integrated_df[integrated_df['superid'].isin(['WV061012', 'AM057403'])]

# Merge electricity emissions into an integrated recc table

In [None]:
ec_emissions_df = ec_emissions_df[(ec_emissions_df['producer_type']=='Total Electric Power Industry')& # units = metric ton
                                  (ec_emissions_df['energy_source']=='All Sources')]

ec_generation_df = ec_generation_df[(ec_generation_df['type_of_producer']=='Total Electric Power Industry')&
                                  (ec_generation_df['energy_source']=='Total')]

In [None]:
ec_emissions_df

In [None]:
# calculate emission factors
# Total Emissions/Total Electricity Generated
ec_emission_factors_df = pd.merge(ec_generation_df,ec_emissions_df[['year','state','emission_type','amount']])
ec_emission_factors_df['emission_factor'] = ec_emission_factors_df['amount'] / ec_emission_factors_df['generation_megawatthours_']

# add column emission_factor_units
ec_emission_factors_df['emission_factor_units'] = 'kg/kWh'
ec_emission_factors_df['sourccode'] = 'EC'

In [None]:
ec_emission_factors_df

In [None]:
integrated_df.columns

In [None]:
# combine ec_emission_factors_df with the integrated recc table
integrated_df = pd.merge(integrated_df, ec_emission_factors_df[['state','year','emission_type','emission_factor','emission_factor_units','sourccode']],
                         left_on=['fy','state','sourccode'],
                         right_on=['year','state','sourccode'],
                         how='left')

# merge overlapping columns
integrated_df['emission_type'] = integrated_df['emission_type_x'].combine_first(integrated_df['emission_type_y'])
integrated_df['emission_factor_units'] = integrated_df['emission_factor_units_x'].combine_first(integrated_df['emission_factor_units_y'])
integrated_df['emission_factor'] = integrated_df['emission_factor_x'].combine_first(integrated_df['emission_factor_y'])

# drop the old duplicate columns
integrated_df.drop(columns=['emission_type_x', 'emission_type_y', 'emission_factor_units_x', 'emission_factor_units_y', 'year','emission_factor_x','emission_factor_y'], 
                   inplace=True)


In [None]:
integrated_df[integrated_df['superid'].isin(['WV061012', 'AM057403'])]

In [None]:
integrated_df.loc[integrated_df['sourccode'] == 'EC', 'emissions_avoided'] = (
    integrated_df['emission_factor'] * integrated_df['conserved']
)
integrated_df.loc[integrated_df['sourccode'] == 'EC', 'emissions_avoided'] = (
    integrated_df['emission_factor'] * integrated_df['conserved']
)


In [None]:
integrated_df[integrated_df['id'].isin(['SF0532', 'OR0712', 'MI0415','IC0115'])]

In [None]:
integrated_df[integrated_df['superid'].isin(['WV061012', 'AM057403','MI041503','SF053206','SF053207','MI041503','IC011501'])]

## Generate a final integrated dataset

In [None]:
# save integrated dataframe
integrated_df.to_csv("../../data/intermediate_data/iac_integrated.csv", index=False)