In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

# Local imports
from hat.utils.stability_testing import *

In [11]:
pv_dest_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/complementarity/pv_time_series'
wind_dest_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/complementarity/wind_time_series'
pv_filename_prefix = 'cf_pv_profile'
wind_filename_prefix = 'cf_wind_profile'

c_stab_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/complementarity/c_stab'
cf_mix_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/complementarity/cf_mix'
pearsons_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/complementarity/pearson'

rep_year_df = pd.read_csv('../data/misc/ed_representative_year.csv')
years = range(2007,2014)

pv_wind_id_df = pd.read_csv('../data/misc/master_pv_wind_ed.csv')

# build the base dataframe with the timestamps from 2012
rng = pd.date_range(f'2012-01-01 00:00:00+00:00', periods=8760, freq='1H')
base_df = pd.DataFrame()
base_df['dateTime'] = rng 

In [5]:
pv_wind_id_df.head(3)

Unnamed: 0,lat,lon,dam_name,site_id,post_csv_filename,pv_id,wind_id
0,45.158051,-67.401657,Lower Saint Croix River,1021000,/Users/galljf/OneDrive - Idaho National Labora...,1681,1681
1,44.786598,-71.124901,Stearns Brook-Androscoggin River,1053500,/Users/galljf/OneDrive - Idaho National Labora...,1178,1178
2,44.456947,-71.186066,Stearns Brook-Androscoggin River,1054000,/Users/galljf/OneDrive - Idaho National Labora...,726,726


# Complementarity PV-Hydro

In [16]:
from pathlib import Path

### Stability coefficient
missing_sites = []
df_missing = pd.DataFrame()

pv_wind_id_df.fillna(0, inplace=True)

pearsons_pv_hy = pd.DataFrame(columns=['site_id','p'])
# only processing data for this year
year = 2020

annual_stab_df = pd.DataFrame(columns = ['site_id', 'lat', 'lon', 'c_stab'])

for idx, row in pv_wind_id_df.iterrows():
        
    if row['post_csv_filename'] == 0:
        print( f"No file for site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    if row['site_id'] not in rep_year_df.site.values:
        print(f"Site doesn't have enough data: {row['site_id']}")
        empty_data_sites.append(row['site_id'])
        query_df.loc[idx,'post_csv_filename'] = ""
        continue
        
    print(f"{idx}. Processing PV profile {row['pv_id']} and ED {row['site_id']}, {row['post_csv_filename']}")
    
    solar_df = pd.read_csv(f"{pv_dest_path}/{row['pv_id']}_{pv_filename_prefix}.csv",parse_dates=True,index_col=0)
    hydro_df = pd.read_csv(row['post_csv_filename'],parse_dates=True,index_col=0)
    
    hydro_df.fillna(0, inplace=True)
    
    # Finds the representative year for the given stream gage
    year = str(rep_year_df['year'].loc[rep_year_df.site == row['site_id']].values[0])
    
    if len(hydro_df[year]) < 8760 or len(hydro_df[year]) > 8784:
        print("Hydro data incomplete")
        missing_sites.append(row['site_id'])
        continue
        
    if float(hydro_df['capacity_factor'].sum()) == 0.0:
        print( f"No data in site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    cf_mix = []
    c_stab = []
    
    # Massaging the timestamps of the hydropower time-series given that we are chosing the best year in terms of data completion.
    # Here, we copy the CF into a new dataframe with the time-stamps from 2012
    tmp_hydro_df_rep_year = hydro_df.loc[str(year),'capacity_factor'].copy().to_frame()
    tmp_hydro_df = base_df.copy()
    tmp_hydro_df.set_index('dateTime',inplace=True)
    tmp_hydro_df['capacity_factor'] = tmp_hydro_df_rep_year.loc[year,'capacity_factor'].values
    
    # Indexing 8760 hours
    tmp_solar_df = solar_df.loc['2012-01-01':'2012-12-30'].copy()
    
    # Re-sample the hourly data into daily values
    #tmp_hydro_df = tmp_hydro_df.resample('D').mean()
    #tmp_solar_df = tmp_solar_df.resample('D').mean()

    tmp_hydro_df.rename(columns={'capacity_factor':row['site_id']}, inplace=True)
    tmp_solar_df.rename(columns={str(row['pv_id']):row['site_id']}, inplace=True)
    
    # Compute complementarity metrics (PV as reference)
    cf_mix_tmp, c_stab_tmp = main_stability(tmp_solar_df, tmp_hydro_df)
    cf_mix_tmp.rename(columns={row['site_id']:year}, inplace=True)

    cf_mix.append(cf_mix_tmp)
    c_stab.append(c_stab_tmp)
     
    cf_mix_df = pd.concat(cf_mix, axis=1)
    
    c_stab_df = pd.concat(c_stab, axis=1)
    
    annual_mean = c_stab_df.mean()
    
    annual_stab_df.loc[idx,'site_id'] = row['site_id']
    annual_stab_df.loc[idx,'lat'] = row['lat']
    annual_stab_df.loc[idx,'lon'] = row['lon']
    annual_stab_df.loc[idx,'c_stab'] = annual_mean.values[0]
    
    #### Pearsons
    pearsons_pv_hy.loc[idx,'site_id'] = row['site_id']
    corr, _ = pearsonr(tmp_solar_df[row['site_id']].values, tmp_hydro_df[row['site_id']].values)
    #p_cff = stability_coefficient(tmp_solar_df, tmp_hydro_df)
    pearsons_pv_hy.loc[idx,'p'] = corr
    

annual_stab_df.to_csv(f"{c_stab_path}/ed_stab_pv_hydro.csv", index=False)
pearsons_pv_hy.to_csv(f"{pearsons_path}/ed_pearson_pv_hydro.csv", index=False)

0. Processing PV profile 1681 and ED 1021000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1021000_2010_synthetic.csv
1. Processing PV profile 1178 and ED 1053500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1053500_2013_synthetic.csv
2. Processing PV profile 726 and ED 1054000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1054000_2013_synthetic.csv
3. Processing PV profile 726 and ED 1054000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1054000_2013_synthetic.csv
4. Processing PV profile 726 and ED 1054000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1054000_2013_synthetic.csv
5. Processing PV profile 780 and ED 1054500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plan

# Complementarity Wind-hydro

In [18]:
from pathlib import Path

### Stability coefficient
missing_sites = []
df_missing = pd.DataFrame()

pv_wind_id_df.fillna(0, inplace=True)

pearsons_wind_hy = pd.DataFrame(columns=['site_id','p'])
# only processing data for this year
year = 2020

annual_stab_df = pd.DataFrame(columns = ['site_id', 'lat', 'lon', 'c_stab'])

for idx, row in pv_wind_id_df.iterrows():
        
    if row['post_csv_filename'] == 0:
        print( f"No file for site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    if row['site_id'] not in rep_year_df.site.values:
        print(f"Site doesn't have enough data: {row['site_id']}")
        empty_data_sites.append(row['site_id'])
        query_df.loc[idx,'post_csv_filename'] = ""
        continue
        
    print(f"{idx}. Processing Wind profile {row['wind_id']} and ED {row['site_id']}, {row['post_csv_filename']}")
    
    wind_df = pd.read_csv(f"{wind_dest_path}/{row['wind_id']}_{wind_filename_prefix}.csv",parse_dates=True,index_col=0)
    hydro_df = pd.read_csv(row['post_csv_filename'],parse_dates=True,index_col=0)
    
    hydro_df.fillna(0, inplace=True)
    
    # Finds the representative year for the given stream gage
    year = str(rep_year_df['year'].loc[rep_year_df.site == row['site_id']].values[0])
    
    if len(hydro_df[year]) < 8760 or len(hydro_df[year]) > 8784:
        print("Hydro data incomplete")
        missing_sites.append(row['site_id'])
        continue
        
    if float(hydro_df['capacity_factor'].sum()) == 0.0:
        print( f"No data in site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    cf_mix = []
    c_stab = []
    
    # Massaging the timestamps of the hydropower time-series given that we are chosing the best year in terms of data completion.
    # Here, we copy the CF into a new dataframe with the time-stamps from 2012
    tmp_hydro_df_rep_year = hydro_df.loc[str(year),'capacity_factor'].copy().to_frame()
    tmp_hydro_df = base_df.copy()
    tmp_hydro_df.set_index('dateTime',inplace=True)
    tmp_hydro_df['capacity_factor'] = tmp_hydro_df_rep_year.loc[year,'capacity_factor'].values
    
    # Indexing 8760 hours
    tmp_wind_df = wind_df.loc['2012-01-01':'2012-12-30'].copy()
    
    # Re-sample the hourly data into daily values
    #tmp_hydro_df = tmp_hydro_df.resample('D').mean()
    #tmp_wind_df = tmp_wind_df.resample('D').mean()

    tmp_hydro_df.rename(columns={'capacity_factor':row['site_id']}, inplace=True)
    tmp_wind_df.rename(columns={str(row['wind_id']):row['site_id']}, inplace=True)
    
    # Compute complementarity metrics (wind as reference)
    cf_mix_tmp, c_stab_tmp = main_stability(tmp_wind_df, tmp_hydro_df)
    cf_mix_tmp.rename(columns={row['site_id']:year}, inplace=True)

    cf_mix.append(cf_mix_tmp)
    c_stab.append(c_stab_tmp)
     
    cf_mix_df = pd.concat(cf_mix, axis=1)
    
    c_stab_df = pd.concat(c_stab, axis=1)
    
    annual_mean = c_stab_df.mean()
    
    annual_stab_df.loc[idx,'site_id'] = row['site_id']
    annual_stab_df.loc[idx,'lat'] = row['lat']
    annual_stab_df.loc[idx,'lon'] = row['lon']
    annual_stab_df.loc[idx,'c_stab'] = annual_mean.values[0]
    
    #### Pearsons
    pearsons_wind_hy.loc[idx,'site_id'] = row['site_id']
    corr, _ = pearsonr(tmp_wind_df[row['site_id']].values, tmp_hydro_df[row['site_id']].values)
    #p_cff = stability_coefficient(tmp_wind_df, tmp_hydro_df)
    pearsons_wind_hy.loc[idx,'p'] = corr
    

annual_stab_df.to_csv(f"{c_stab_path}/ed_stab_wind_hydro.csv", index=False)
pearsons_wind_hy.to_csv(f"{pearsons_path}/ed_pearson_wind_hydro.csv", index=False)

0. Processing Wind profile 1681 and ED 1021000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1021000_2010_synthetic.csv
1. Processing Wind profile 1178 and ED 1053500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1053500_2013_synthetic.csv
2. Processing Wind profile 726 and ED 1054000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1054000_2013_synthetic.csv
3. Processing Wind profile 726 and ED 1054000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1054000_2013_synthetic.csv
4. Processing Wind profile 726 and ED 1054000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/existing_plants/synthetic_profiles/ed_1054000_2013_synthetic.csv
5. Processing Wind profile 780 and ED 1054500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/e