In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

# Local imports
from hat.utils.stability_testing import *

In [2]:
pv_dest_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/complementarity/pv_time_series'
wind_dest_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/complementarity/wind_time_series'
pv_filename_prefix = 'cf_pv_profile'
wind_filename_prefix = 'cf_wind_profile'

c_stab_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/complementarity/c_stab'
cf_mix_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/complementarity/cf_mix'
pearsons_path = '/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/complementarity/pearson'

rep_year_df = pd.read_csv('../data/misc/representative_year.csv')
years = range(2007,2014)

pv_wind_id_df = pd.read_csv('/Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/misc/master_hydro_pv_wind.csv')

# build the base dataframe with the timestamps from 2012
rng = pd.date_range(f'2012-01-01 00:00:00+00:00', periods=8760, freq='1H')
base_df = pd.DataFrame()
base_df['dateTime'] = rng 

In [3]:
pv_wind_id_df.head()

Unnamed: 0,start,end,lat,lon,name,site_id,post_csv_filename,pv_id,wind_id,pv_wind_id_same
0,1/1/00,12/31/20,40.147715,-105.865879,GRANBY,9019500,/Users/galljf/OneDrive - Idaho National Labora...,CO01656,CO01656,True
1,1/1/00,12/31/20,36.46667,-91.53,MAMMOTH SPRINGS DAM 3,7069220,,AR01157,AR01157,True
2,1/1/00,12/31/20,35.394714,-106.547531,JEMEZ CANYON DAM,8328950,/Users/galljf/OneDrive - Idaho National Labora...,NM00003,NM00003,True
3,1/1/00,12/31/20,41.475038,-79.445464,TIONESTA DAM,3020000,/Users/galljf/OneDrive - Idaho National Labora...,PA00110,PA00110,True
4,1/1/00,12/31/20,34.398667,-87.987939,Bear Creek,3592000,/Users/galljf/OneDrive - Idaho National Labora...,AL05903,AL05903,True


# Complementarity PV-Hydro

In [4]:
from pathlib import Path
from scipy.stats import pearsonr

### Stability coefficient
missing_sites = []
df_missing = pd.DataFrame()

pv_wind_id_df.fillna(0, inplace=True)

pearsons_pv_hy = pd.DataFrame(columns=['site_id','p'])

annual_stab_df = pd.DataFrame(columns = ['site_id', 'lat', 'lon', 'c_stab'])

for idx, row in pv_wind_id_df.iterrows():

    if row['post_csv_filename'] == 0:
        print( f"No file for site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    if row['site_id'] not in rep_year_df.site.values:
        print(f"Site doesn't have enough data: {row['site_id']}")
        empty_data_sites.append(row['site_id'])
        query_df.loc[idx,'post_csv_filename'] = ""
        continue
        
    print(f"{idx}. Processing PV profile {row['pv_id']} and NPD {row['site_id']}, {row['post_csv_filename']}")

    solar_df = pd.read_csv(f"{pv_dest_path}/{row['pv_id']}_{pv_filename_prefix}.csv",parse_dates=True,index_col=0)
    hydro_df = pd.read_csv(row['post_csv_filename'],parse_dates=True,index_col=0)
    
    hydro_df.fillna(0, inplace=True)
    
    if float(hydro_df['capacity_factor'].sum()) == 0.0:
        print( f"No data in site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
        
    cf_mix = []
    c_stab = []
    
    # Finds the representative year for the given stream gage
    year = str(rep_year_df['year'].loc[rep_year_df.site == row['site_id']].values[0])
    
    # Go through every year and save it in a list of dataframes
    # Massaging the timestamps of the hydropower time-series given that we are chosing the best year in terms of data completion.
    # Here, we copy the CF into a new dataframe with the time-stamps from 2012
    tmp_hydro_df_rep_year = hydro_df.loc[str(year),'capacity_factor'].copy().to_frame()
    tmp_hydro_df = base_df.copy()
    tmp_hydro_df.set_index('dateTime',inplace=True)
    tmp_hydro_df['capacity_factor'] = tmp_hydro_df_rep_year.loc[year,'capacity_factor'].values
 
    # Indexing 8760 hours
    tmp_solar_df = solar_df.loc['2012-01-01':'2012-12-30'].copy()
    
    # Re-sample the hourly data into daily values
    #tmp_hydro_df = tmp_hydro_df.resample('D').mean()
    #tmp_solar_df = tmp_solar_df.resample('D').mean()

    tmp_hydro_df.rename(columns={'capacity_factor':row['site_id']}, inplace=True)
    tmp_solar_df.rename(columns={'pv_cf':row['site_id']}, inplace=True)
    
    # Compute complementarity metrics (PV as reference)
    cf_mix_tmp, c_stab_tmp = main_stability(tmp_solar_df, tmp_hydro_df)
    cf_mix_tmp.rename(columns={row['site_id']:year}, inplace=True)

    cf_mix.append(cf_mix_tmp)
    c_stab.append(c_stab_tmp)
        
        #break
        
    cf_mix_df = pd.concat(cf_mix, axis=1)
    #cf_mix_df.to_csv(f"{cf_mix_path}/hydro-pv/{row['site_id']}.csv")
    
    c_stab_df = pd.concat(c_stab, axis=1)
    #c_stab_df.to_csv(f"{c_stab_path}/hydro-pv/{row['site_id']}.csv")
    
    annual_mean = c_stab_df.mean()
    
    annual_stab_df.loc[idx,'site_id'] = row['site_id']
    annual_stab_df.loc[idx,'lat'] = row['lat']
    annual_stab_df.loc[idx,'lon'] = row['lon']
    annual_stab_df.loc[idx,'c_stab'] = annual_mean.values[0]
    
    #break
    
    #### Pearsons
    pearsons_pv_hy.loc[idx,'site_id'] = row['site_id']
    corr, _ = pearsonr(tmp_solar_df[row['site_id']].values, tmp_hydro_df[row['site_id']].values)
    #p_cff = stability_coefficient(tmp_solar_df, tmp_hydro_df)
    pearsons_pv_hy.loc[idx,'p'] = corr
    
    #if idx == 3:
    #break
    
annual_stab_df.to_csv(f"{c_stab_path}/npd_stab_pv_hydro.csv")
pearsons_pv_hy.to_csv(f"{c_stab_path}/npd_pearson_pv_hydro.csv")

0. Processing PV profile CO01656 and NPD 9019500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_9019500_2016_synthetic.csv
No data in site 9019500 
No file for site 7069220 
2. Processing PV profile NM00003 and NPD 8328950, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_8328950_2016_synthetic.csv
No data in site 8328950 
3. Processing PV profile PA00110 and NPD 3020000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_3020000_2019_synthetic.csv


# Complementarity Wind-hydro

In [37]:
# Stability coefficient
missing_sites = []
df_missing = pd.DataFrame()

pv_wind_id_df.fillna(0, inplace=True)
# only processing data for this year
year = 2020

pearsons_wind_hy = pd.DataFrame(columns=['site_id','p'])

annual_stab_df = pd.DataFrame(columns = ['site_id', 'lat', 'lon', 'c_stab'])

for idx, row in pv_wind_id_df.iterrows():
    
    #print(type(row['post_csv_filename']))
    if row['post_csv_filename'] == 0:
        print( f"No file for site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
        
    print(f"{idx}. Processing Wind profile {row['wind_id']} and NPD {row['site_id']}, {row['post_csv_filename']}")

    wind_df = pd.read_csv(f"{wind_dest_path}/{row['wind_id']}_{wind_filename_prefix}.csv",parse_dates=True,index_col=0)
    hydro_df = pd.read_csv(row['post_csv_filename'],parse_dates=True,index_col=0)
    
    
    hydro_df.fillna(0, inplace=True)
    
    if hydro_df['capacity_factor'].sum() == 0.0:
        print( f"No data in site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    cf_mix = []
    c_stab = []
    
    # Finds the representative year for the given stream gage
    year = str(rep_year_df['year'].loc[rep_year_df.site == row['site_id']].values[0])
    
    #for year in years:
    # Go through every year and save it in a list of dataframes
    # Go through every year and save it in a list of dataframes
    # Massaging the timestamps of the hydropower time-series given that we are chosing the best year in terms of data completion.
    # Here, we copy the CF into a new dataframe with the time-stamps from 2012
    tmp_hydro_df_2020 = hydro_df.loc[str(year),'capacity_factor'].copy().to_frame()
    tmp_hydro_df = base_df.copy()
    tmp_hydro_df['capacity_factor'] = tmp_hydro_df_2020.loc[year,'capacity_factor'].values
    tmp_hydro_df.set_index('dateTime',inplace=True)
    
    tmp_wind_df = wind_df.loc['2012-01-01':'2012-12-30'].copy()

    tmp_hydro_df.rename(columns={'capacity_factor':row['site_id']}, inplace=True)
    tmp_wind_df.rename(columns={'wind_cf':row['site_id']}, inplace=True)
    #tmp_wind_df.head()
    cf_mix_tmp, c_stab_tmp = main_stability(tmp_wind_df, tmp_hydro_df)
    cf_mix_tmp.rename(columns={row['site_id']:year}, inplace=True)

    cf_mix.append(cf_mix_tmp)
    c_stab.append(c_stab_tmp)
        #break
    cf_mix_df = pd.concat(cf_mix, axis=1)
    #cf_mix_df.to_csv(f"data/2000_2020_npd/cf_mix/hydro-wind/{row['site_id']}.csv")
    
    c_stab_df = pd.concat(c_stab, axis=1)
    #c_stab_df.to_csv(f"{c_stab_path}/hydro-wind/{row['site_id']}.csv")
    
    annual_mean = c_stab_df.mean()
    
    annual_stab_df.loc[idx,'site_id'] = row['site_id']
    annual_stab_df.loc[idx,'lat'] = row['lat']
    annual_stab_df.loc[idx,'lon'] = row['lon']
    annual_stab_df.loc[idx,'c_stab'] = annual_mean.values[0]
    
    #### Pearsons
    pearsons_wind_hy.loc[idx,'site_id'] = row['site_id']
    corr, _ = pearsonr(tmp_wind_df[row['site_id']].values, tmp_hydro_df[row['site_id']].values)
    
    #p_cff = stability_coefficient(tmp_hydro_df,tmp_wind_df)
    pearsons_wind_hy.loc[idx,'p'] = corr
    #pearsons_wind_hy.loc[idx,'p'] = stability_coefficient(tmp_hydro_df,tmp_wind_df)
    #break

annual_stab_df.to_csv(f"{c_stab_path}/npd_stab_wind_hydro.csv")
pearsons_wind_hy.to_csv(f"{c_stab_path}/npd_pearson_wind_hydro.csv")

0. Processing Wind profile CO01656 and NPD 9019500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_9019500_2016_synthetic.csv
No data in site 9019500 
No file for site 7069220 
2. Processing Wind profile NM00003 and NPD 8328950, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_8328950_2016_synthetic.csv
No data in site 8328950 
3. Processing Wind profile PA00110 and NPD 3020000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_3020000_2019_synthetic.csv
4. Processing Wind profile AL05903 and NPD 3592000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_3592000_2018_synthetic.csv
5. Processing Wind profile NM00293 and NPD 7227000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_7227000_2015_synthetic.csv
No data in site 7227000 
6. Processing Wind profile TX00001 and NPD 8



76. Processing Wind profile CT00378 and NPD 1188090, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_1188090_2014_synthetic.csv
77. Processing Wind profile NC00182 and NPD 2105769, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_2105769_2013_synthetic.csv
78. Processing Wind profile CO02788 and NPD 9041400, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_9041400_2019_synthetic.csv
No data in site 9041400 
79. Processing Wind profile AZ82203 and NPD 9426000, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_9426000_2012_synthetic.csv
80. Processing Wind profile IA01213 and NPD 5463050, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/data/npd/synthetic_profiles/npd_5463050_2017_synthetic.csv
81. Processing Wind profile NY00558 and NPD 4242500, /Users/galljf/OneDrive - Idaho National Laboratory/code/hat/da