### Import Libraries

In [18]:
import pandas as pd
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import janitor
from janitor import clean_names

### Import Data

In [19]:
# Emissions
emissions_tidy_df = pd.read_csv("../data/intermediate_data/emissions_tidy.csv")

# Generation
generation_df = pd.read_csv("../data/intermediate_data/generation.csv")


### Integrate Generation data into the Emissions dataframe

In [20]:
print(list(generation_df.columns))

['year', 'state', 'type_of_producer', 'energy_source', 'generation_megawatthours', 'units']


In [21]:
# Merge the generation dataframe with the emissions dataframe on the "state" column
# Add new column for emissions by Tonnes/MWh =  kg/kWh
# Only include producer type Total Electric Power Industry and energy source All Sources/Total
# Merge generation and emissions dataframes

# Filter
emissions_filtered = emissions_tidy_df[
    (emissions_tidy_df['energy_source'] == 'All Sources') &
    (emissions_tidy_df['producer_type'] == 'Total Electric Power Industry')]
generation_filtered = generation_df[
    (generation_df['energy_source'] == 'Total') &
    (generation_df['type_of_producer'] == 'Total Electric Power Industry')]

# Merge
emissions_generation_df = generation_filtered.merge(
    emissions_filtered[['state', 'year', 'producer_type', 'energy_source', 'emission_type', 'amount']].rename(columns={
        'producer_type': 'type_of_producer'  # Rename to match the generation dataframe
    }),
    on=['state', 'year'],
    how='left'  # Left join to keep all rows from generation data
)

# Calculate emissions per generation and add the 'kg/kWh' unit
emissions_generation_df['emission_factor'] = emissions_generation_df['amount'] / emissions_generation_df['generation_megawatthours']
emissions_generation_df['unit'] = 'kg/kWh'

In [22]:
# reoder columns in recc_ppi_df
emissions_generation_columns_order = ['state', 'year', 'generation_megawatthours', 'units', 'emission_type', 'amount', 'emission_factor', 'unit' 
                          ]
emissions_generation_df = emissions_generation_df[emissions_generation_columns_order]
emissions_generation_df

Unnamed: 0,state,year,generation_megawatthours,units,emission_type,amount,emission_factor,unit
0,AK,1990,5599506.0,MWh,CO2,4208809.0,0.751639,kg/kWh
1,AK,1990,5599506.0,MWh,SO2,18741.0,0.003347,kg/kWh
2,AK,1990,5599506.0,MWh,NOx,12562.0,0.002243,kg/kWh
3,AL,1990,79652133.0,MWh,CO2,52936063.0,0.664591,kg/kWh
4,AL,1990,79652133.0,MWh,SO2,505530.0,0.006347,kg/kWh
...,...,...,...,...,...,...,...,...
5275,WV,2023,52286784.0,MWh,SO2,35530.0,0.000680,kg/kWh
5276,WV,2023,52286784.0,MWh,NOx,25505.0,0.000488,kg/kWh
5277,WY,2023,43181420.0,MWh,CO2,36580219.0,0.847129,kg/kWh
5278,WY,2023,43181420.0,MWh,SO2,21963.0,0.000509,kg/kWh
