In [37]:
import pandas as pd
import numpy as np

In [5]:
df_eutl_emissions = pd.read_csv('../data/intermediate/eutl_first_estimate.csv').set_index('ngc_bmu_id')
df_pi_emissions = pd.read_csv('../data/intermediate/pi_first_estimate.csv').set_index('ngc_bmu_id')

df_eutl_emissions.head()

Unnamed: 0_level_0,fuel,gco2_per_kWh,eutl_account
ngc_bmu_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABTH7,coal,862.683692,97175
ABTH7G,ocgt,862.683692,97175
ABTH8,coal,862.683692,97175
ABTH8G,ocgt,862.683692,97175
ABTH9,coal,862.683692,97175


In [17]:
s_eutl_account_to_co2_intensity = df_eutl_emissions.groupby('eutl_account')['gco2_per_kWh'].first()
s_pi_permits_to_co2_intensity = df_pi_emissions.groupby('pi_permits')['gco2_per_kWh'].first()

s_eutl_account_to_co2_intensity.head()

eutl_account
96786     381.599995
96827            NaN
96842     210.571595
96864     389.582482
96869    1060.121107
Name: gco2_per_kWh, dtype: float64

In [24]:
eutl_account_to_ngc_bmu_ids = df_eutl_emissions.reset_index().groupby('eutl_account')['ngc_bmu_id'].unique().apply(list).to_dict()
pi_permits_to_ngc_bmu_ids = df_pi_emissions.reset_index().groupby('pi_permits')['ngc_bmu_id'].unique().apply(list).to_dict()

pd.Series(eutl_account_to_ngc_bmu_ids).head().to_dict()

{96786: ['BAGE-1', 'BAGE-2'],
 96827: ['LNMTH-1', 'LNMTH-2', 'LNMTH-3'],
 96842: ['DRAXX-1',
  'DRAXX-10G',
  'DRAXX-12G',
  'DRAXX-2',
  'DRAXX-3',
  'DRAXX-4',
  'DRAXX-5',
  'DRAXX-6',
  'DRAXX-9G'],
 96864: ['SPLN-1'],
 96869: ['INDQ-1']}

In [8]:
s_eutl_permits_fuel_types = df_eutl_emissions.groupby('eutl_account')['fuel'].unique()
s_eutl_permits_pure_fuel_types = s_eutl_permits_fuel_types[s_eutl_permits_fuel_types.apply(len)==1].apply(lambda x: x[0])

s_eutl_permits_pure_fuel_types.value_counts()

ccgt       36
coal        5
ocgt        5
biomass     3
Name: fuel, dtype: int64

In [6]:
s_pi_permits_fuel_types = df_pi_emissions.groupby('pi_permits')['fuel'].unique()
s_pi_permits_pure_fuel_types = s_pi_permits_fuel_types[s_pi_permits_fuel_types.apply(len)==1].apply(lambda x: x[0])

s_pi_permits_pure_fuel_types.value_counts()

ccgt       28
coal        3
biomass     1
ocgt        1
Name: fuel, dtype: int64

In [None]:
# need to find permits and accounts which relate to the same subset of ngc bmu ids
# would also be interesting to double check those that only share some ngc bmu ids

# should use eutl accounts as new index as they're currently all single accounts

In [55]:
eutl_account_to_pi_permit = {}
ngc_bmu_id_to_eutl_account = df_eutl_emissions['eutl_account'].to_dict()

for pi_permits, ngc_bmu_ids in pi_permits_to_ngc_bmu_ids.items():
    eutl_accounts_in_group = []
    
    for ngc_bmu_id in ngc_bmu_ids:
        eutl_accounts_in_group += [ngc_bmu_id_to_eutl_account[ngc_bmu_id]]
        
    if len(set(eutl_accounts_in_group)) == 1:
        eutl_account_to_pi_permit[eutl_accounts_in_group[0]] = pi_permits
        
s_eutl_account_to_pi_permit = pd.Series(eutl_account_to_pi_permit)
        
s_eutl_account_to_pi_permit.size

38

In [59]:
eutl_accounts_with_mixed_fuel_types = (s_eutl_permits_fuel_types.apply(len)>1).replace(False, np.nan).dropna().index
s_eutl_accounts_with_mixed_fuel_types = pd.Series(['mixed']*len(eutl_accounts_with_mixed_fuel_types), index=eutl_accounts_with_mixed_fuel_types)

s_eutl_accounts_with_mixed_fuel_types.size

10

In [None]:
# after creating the eutl df reset the index and merge in the pi only datapoints, then co2 estimate

In [126]:
#exports
def start_joined_df(s_eutl_permits_pure_fuel_types, s_eutl_account_to_pi_permit, s_eutl_accounts_with_mixed_fuel_types):
    eutl_accounts = sorted(list(set(list(s_eutl_permits_pure_fuel_types.index) + list(s_eutl_account_to_pi_permit.index) + list(s_eutl_accounts_with_mixed_fuel_types.index))))

    df_joined = (pd
                 .DataFrame(index=eutl_accounts)
                 .assign(pi_permits=s_eutl_account_to_pi_permit)
                 .assign(eutl_co2_intensity=s_eutl_account_to_co2_intensity)
                 .assign(fuel_type=s_eutl_accounts_with_mixed_fuel_types.append(s_eutl_permits_pure_fuel_types))
                )

    df_joined.index.name = 'eutl_account'
    df_joined = df_joined.reset_index()
    
    return df_joined

In [127]:
df_joined = start_joined_df(s_eutl_permits_pure_fuel_types, s_eutl_account_to_pi_permit, s_eutl_accounts_with_mixed_fuel_types)

df_joined.head()

Unnamed: 0,eutl_account,pi_permits,eutl_co2_intensity,fuel_type
0,96786,,381.599995,ccgt
1,96827,FP3137CG,,biomass
2,96842,VP3530LS,210.571595,mixed
3,96864,BK0701IW,389.582482,ccgt
4,96869,,1060.121107,ocgt


In [132]:
#exports
def add_pi_permits_data_to_joined_df(df_joined, df_pi_emissions, s_pi_permits_fuel_types, s_pi_permits_to_co2_intensity):
    unique_permit_ids = df_pi_emissions['pi_permits'].unique()
    unmatched_pi_permits = sorted(list(set(unique_permit_ids) - set(df_joined['pi_permits'].dropna())))

    df_missing_pi_permits_fuel_types = (s_pi_permits_fuel_types
                                        .loc[unmatched_pi_permits]
                                        .apply(lambda x: x[0] if len(x)==1 else 'mixed')
                                        .reset_index()
                                        .rename(columns={'fuel': 'fuel_type'})
                                       )

    df_joined = pd.concat([df_joined, df_missing_pi_permits_fuel_types], ignore_index=True)
    df_joined = df_joined.assign(pi_co2_intensity=df_joined['pi_permits'].dropna().map(s_pi_permits_to_co2_intensity))
    df_joined['eutl_account'] = df_joined['eutl_account'].astype('Int64')
    
    return df_joined

In [133]:
df_joined = add_pi_permits_data_to_joined_df(df_joined, df_pi_emissions, s_pi_permits_fuel_types, s_pi_permits_to_co2_intensity)

df_joined.head()

Unnamed: 0,eutl_account,pi_permits,eutl_co2_intensity,fuel_type,pi_co2_intensity
0,96786,,381.599995,ccgt,
1,96827,FP3137CG,,biomass,1005.365041
2,96842,VP3530LS,210.571595,mixed,894.07047
3,96864,BK0701IW,389.582482,ccgt,389.962235
4,96869,,1060.121107,ocgt,


In [None]:
# should have a list of fuel types instead of 'mixed' (could join with '__')
# should also have a column which lists the NGC BMU ids associated with each row

# calculate the percentage difference between the two then sort on that

In [151]:
df_joined_full = (df_joined
                  .loc[df_joined[['eutl_co2_intensity', 'pi_co2_intensity']].dropna().index]
                  .pipe(lambda df: df.assign(co2_intensity_pct_delta=(df['eutl_co2_intensity']-df['pi_co2_intensity'])/df['pi_co2_intensity']))
                  .pipe(lambda df: df.reindex(df['co2_intensity_pct_delta'].abs().sort_values().index))
                  .reset_index(drop=True)
                  .reindex(columns=['eutl_account' ,'pi_permits', 'fuel_type', 'eutl_co2_intensity', 'pi_co2_intensity', 'co2_intensity_pct_delta'])
                  .pipe(lambda df: df.assign(ngc_bmu_ids=df['eutl_account'].map(eutl_account_to_ngc_bmu_ids).str.join(', ')))
                  .pipe(lambda df: df.assign(pi_permits=df['pi_permits'].str.replace('__', ', ')))
                 )

df_joined_full

Unnamed: 0,eutl_account,pi_permits,fuel_type,eutl_co2_intensity,pi_co2_intensity,co2_intensity_pct_delta,ngc_bmu_ids
0,97532,BL6217IM,ccgt,355.466943,355.464549,7e-06,MRWD-1
1,112433,RP3438GG,ccgt,371.048197,371.062909,-4e-05,"CARR-1, CARR-2"
2,97445,VP3930LH,coal,958.623935,958.737013,-0.000118,"EGGPS-1, EGGPS-2, EGGPS-3, EGGPS-4"
3,97443,SP3133LY,ccgt,552.217721,552.333906,-0.00021,KILNS-1
4,97192,EP3538LB,mixed,923.334083,923.096368,0.000258,"RUGGT-6, RUGGT-7, RUGPS-6, RUGPS-7"
5,97585,AP3633BL,ccgt,383.422828,383.154309,0.000701,LAGA-1
6,97169,AP3630LG,mixed,384.274015,384.00053,0.000712,"LBAR-1, LBAR-1G"
7,97183,KP3531US,ccgt,367.692074,367.98581,-0.000798,GYAR-1
8,96864,BK0701IW,ccgt,389.582482,389.962235,-0.000974,SPLN-1
9,97236,FP3835LS,ccgt,388.400408,387.986922,0.001066,SUTB-1


In [131]:
pi_permits_to_ngc_bmu_ids['AP3233LU']

['PETEM1']

In [None]:
# look at the relationship between carbon intensity and capacity factor (relative to max seen in B1610)

In [25]:
# scatter plot, each col is a different fuel, different dbs are offset and coloured
# for common accounts should look at the difference in the predictions (connected by line) 
# ^ could do these on the same plot