In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

# Local imports
from stability_testing import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
# Defining the input and output paths, and master csv files
pv_dest_path = './data/hydro/npd/complementarity/pv_time_series'
wind_dest_path = './data/hydro/npd/complementarity/wind_time_series'
pv_filename_prefix = 'cf_pv_profile'
wind_filename_prefix = 'cf_wind_profile'

c_stab_path = './data/hydro/npd/complementarity/c_stab'
cf_mix_path = './data/hydro/npd/complementarity/cf_mix'
pearsons_path = './data/hydro/npd/complementarity/pearson'

rep_year_df = pd.read_csv('./data/hydro/misc/representative_year.csv')
years = range(2007,2014)

pv_wind_id_df = pd.read_csv('./data/hydro/misc/master_hydro_pv_wind.csv')

In [12]:
pv_wind_id_df.head(3)

Unnamed: 0,start,end,lat,lon,name,site_id,post_csv_filename,pv_id,wind_id,pv_wind_id_same
0,1/1/00,12/31/20,40.147715,-105.865879,GRANBY,9019500,./data/hydro//npd/synthetic_profiles/npd_90195...,CO01656,CO01656,True
1,1/1/00,12/31/20,36.46667,-91.53,MAMMOTH SPRINGS DAM 3,7069220,0,AR01157,AR01157,True
2,1/1/00,12/31/20,35.394714,-106.547531,JEMEZ CANYON DAM,8328950,./data/hydro//npd/synthetic_profiles/npd_83289...,NM00003,NM00003,True


## Process to compute the complementarity

The purpose of this notebook is to compute the stabilioty and pearson coefficients for 1-year of data using hourly values. The notebook uses the code in the python script `stability_testing.py`to compute the coefficients in a annual basis. That is, an hourly time-series for resource pairs is passed to the corresponding methods to compute an unique value that represents the annual coefficient (stability or pearsons) for a given location. Note that the complementarity metrics are computed separately for PV-Hydro and Wind-Hydro. The process is the same for both pairs. The steps below explain this process. 

The dataframe (DF) `pv_wind_id_df` contains meta data about each stream gage (hydro) and the co-located pv and wind location, along with the path for the synthetic profile.

1. A `for` loop runs through every row (i.e. every stream gage) of the `pv_wind_id_df` DF, to obtain the filenames for the given gage, and pv and wind locations.
2. General housekeeping occurs at the beginning of the loop. For example, we check whether the file exists or if there is data present.
3. Read the csv files for hydro and PV/Wind.
4. Several steps to clean up and prepare the data for cmputing the stability coefficient take place. At this point, we need to make sure the time-series are synchronous in their time steps. Due to data quality issues, we didn't use the same years in the hydro data, so we need to change the timestamps to match the 2012 year, which is the one used for wind and PV.
5. Compute the complementarity metrics.

# Complementarity PV-Hydro

In [11]:
from pathlib import Path
from scipy.stats import pearsonr

# build the base dataframe with the timestamps from 2012
rng = pd.date_range(f'2012-01-01 00:00:00+00:00', periods=8760, freq='1H')
base_df = pd.DataFrame()
base_df['dateTime'] = rng 

### Initialize final dataframes and clean-up
missing_sites = []
df_missing = pd.DataFrame()
pv_wind_id_df.fillna(0, inplace=True)
pearsons_pv_hy = pd.DataFrame(columns=['site_id','p'])
annual_stab_df = pd.DataFrame(columns = ['site_id', 'lat', 'lon', 'c_stab'])

for idx, row in pv_wind_id_df.iterrows():

    if row['post_csv_filename'] == 0:
        print( f"No file for site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    if row['site_id'] not in rep_year_df.site.values:
        print(f"Site doesn't have enough data: {row['site_id']}")
        empty_data_sites.append(row['site_id'])
        query_df.loc[idx,'post_csv_filename'] = ""
        continue
        
    print(f"{idx}. Processing PV profile {row['pv_id']} and NPD {row['site_id']}, {row['post_csv_filename']}")

    solar_df = pd.read_csv(f"{pv_dest_path}/{row['pv_id']}_{pv_filename_prefix}.csv",parse_dates=True,index_col=0)
    hydro_df = pd.read_csv(row['post_csv_filename'],parse_dates=True,index_col=0)
    
    hydro_df.fillna(0, inplace=True)
    
    if float(hydro_df['capacity_factor'].sum()) == 0.0:
        print( f"No data in site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
        
    c_stab = []
    
    # Finds the representative year for the given stream gage
    year = str(rep_year_df['year'].loc[rep_year_df.site == row['site_id']].values[0])
    
    # Go through every year and save it in a list of dataframes
    # Massaging the timestamps of the hydropower time-series given that we are chosing the best year in terms of data completion.
    # Here, we copy the CF into a new dataframe with the time-stamps from 2012
    tmp_hydro_df_rep_year = hydro_df.loc[str(year),'capacity_factor'].copy().to_frame()
    tmp_hydro_df = base_df.copy()
    tmp_hydro_df.set_index('dateTime',inplace=True)
    tmp_hydro_df['capacity_factor'] = tmp_hydro_df_rep_year.loc[year,'capacity_factor'].values
 
    # Indexing 8760 hours
    tmp_solar_df = solar_df.loc['2012-01-01':'2012-12-30'].copy()
    

    tmp_hydro_df.rename(columns={'capacity_factor':row['site_id']}, inplace=True)
    tmp_solar_df.rename(columns={'pv_cf':row['site_id']}, inplace=True)
    
    # Compute complementarity metrics (PV as reference)
    cf_mix_tmp, c_stab_tmp = main_stability(tmp_solar_df, tmp_hydro_df)

    c_stab.append(c_stab_tmp)
    c_stab_df = pd.concat(c_stab, axis=1)
    
    annual_mean = c_stab_df.mean()
    annual_stab_df.loc[idx,'site_id'] = row['site_id']
    annual_stab_df.loc[idx,'lat'] = row['lat']
    annual_stab_df.loc[idx,'lon'] = row['lon']
    annual_stab_df.loc[idx,'c_stab'] = annual_mean.values[0]
    
    #### Pearsons
    pearsons_pv_hy.loc[idx,'site_id'] = row['site_id']
    corr, _ = pearsonr(tmp_solar_df[row['site_id']].values, tmp_hydro_df[row['site_id']].values)
    #p_cff = stability_coefficient(tmp_solar_df, tmp_hydro_df)
    pearsons_pv_hy.loc[idx,'p'] = corr
    
annual_stab_df.to_csv(f"{c_stab_path}/npd_stab_pv_hydro.csv")
pearsons_pv_hy.to_csv(f"{c_stab_path}/npd_pearson_pv_hydro.csv")

0. Processing PV profile CO01656 and NPD 9019500, ./data/hydro//npd/synthetic_profiles/npd_9019500_2016_synthetic.csv
No data in site 9019500 
No file for site 7069220 
2. Processing PV profile NM00003 and NPD 8328950, ./data/hydro//npd/synthetic_profiles/npd_8328950_2016_synthetic.csv
No data in site 8328950 
3. Processing PV profile PA00110 and NPD 3020000, ./data/hydro//npd/synthetic_profiles/npd_3020000_2019_synthetic.csv
4. Processing PV profile AL05903 and NPD 3592000, ./data/hydro//npd/synthetic_profiles/npd_3592000_2018_synthetic.csv
5. Processing PV profile NM00293 and NPD 7227000, ./data/hydro//npd/synthetic_profiles/npd_7227000_2015_synthetic.csv
No data in site 7227000 
6. Processing PV profile TX00001 and NPD 8063800, ./data/hydro//npd/synthetic_profiles/npd_8063800_2012_synthetic.csv
No data in site 8063800 
7. Processing PV profile WY01496 and NPD 6670500, ./data/hydro//npd/synthetic_profiles/npd_6670500_2019_synthetic.csv
8. Processing PV profile CT00583 and NPD 1184000



76. Processing PV profile CT00378 and NPD 1188090, ./data/hydro//npd/synthetic_profiles/npd_1188090_2014_synthetic.csv
77. Processing PV profile NC00182 and NPD 2105769, ./data/hydro//npd/synthetic_profiles/npd_2105769_2013_synthetic.csv
78. Processing PV profile CO02788 and NPD 9041400, ./data/hydro//npd/synthetic_profiles/npd_9041400_2019_synthetic.csv
No data in site 9041400 
79. Processing PV profile AZ82203 and NPD 9426000, ./data/hydro//npd/synthetic_profiles/npd_9426000_2012_synthetic.csv
80. Processing PV profile IA01213 and NPD 5463050, ./data/hydro//npd/synthetic_profiles/npd_5463050_2017_synthetic.csv
81. Processing PV profile NY00558 and NPD 4242500, ./data/hydro//npd/synthetic_profiles/npd_4242500_2011_synthetic.csv
No data in site 4242500 
82. Processing PV profile 294 and NPD 3209000, ./data/hydro//npd/synthetic_profiles/npd_3209000_2015_synthetic.csv
No data in site 3209000 
83. Processing PV profile KY03003 and NPD 3320000, ./data/hydro//npd/synthetic_profiles/npd_3320

# Complementarity Wind-hydro

In [10]:
# Stability coefficient
missing_sites = []
df_missing = pd.DataFrame()

pv_wind_id_df.fillna(0, inplace=True)
pearsons_wind_hy = pd.DataFrame(columns=['site_id','p'])
annual_stab_df = pd.DataFrame(columns = ['site_id', 'lat', 'lon', 'c_stab'])

for idx, row in pv_wind_id_df.iterrows():
    
    #print(type(row['post_csv_filename']))
    if row['post_csv_filename'] == 0:
        print( f"No file for site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
        
    print(f"{idx}. Processing Wind profile {row['wind_id']} and NPD {row['site_id']}, {row['post_csv_filename']}")

    wind_df = pd.read_csv(f"{wind_dest_path}/{row['wind_id']}_{wind_filename_prefix}.csv",parse_dates=True,index_col=0)
    hydro_df = pd.read_csv(row['post_csv_filename'],parse_dates=True,index_col=0)
    
    hydro_df.fillna(0, inplace=True)
    
    if hydro_df['capacity_factor'].sum() == 0.0:
        print( f"No data in site {row['site_id']} ")
        missing_sites.append(row['site_id'])
        continue
    
    c_stab = []
    
    # Finds the representative year for the given stream gage
    year = str(rep_year_df['year'].loc[rep_year_df.site == row['site_id']].values[0])
    
    # Go through every year and save it in a list of dataframes
    # Go through every year and save it in a list of dataframes
    # Massaging the timestamps of the hydropower time-series given that we are chosing the best year in terms of data completion.
    # Here, we copy the CF into a new dataframe with the time-stamps from 2012
    tmp_hydro_df_2020 = hydro_df.loc[str(year),'capacity_factor'].copy().to_frame()
    tmp_hydro_df = base_df.copy()
    tmp_hydro_df['capacity_factor'] = tmp_hydro_df_2020.loc[year,'capacity_factor'].values
    tmp_hydro_df.set_index('dateTime',inplace=True)
    
    tmp_wind_df = wind_df.loc['2012-01-01':'2012-12-30'].copy()

    tmp_hydro_df.rename(columns={'capacity_factor':row['site_id']}, inplace=True)
    tmp_wind_df.rename(columns={'wind_cf':row['site_id']}, inplace=True)

    cf_mix_tmp, c_stab_tmp = main_stability(tmp_wind_df, tmp_hydro_df)
    
    c_stab.append(c_stab_tmp)
    
    c_stab_df = pd.concat(c_stab, axis=1)
    
    annual_mean = c_stab_df.mean()
    
    annual_stab_df.loc[idx,'site_id'] = row['site_id']
    annual_stab_df.loc[idx,'lat'] = row['lat']
    annual_stab_df.loc[idx,'lon'] = row['lon']
    annual_stab_df.loc[idx,'c_stab'] = annual_mean.values[0]
    
    #### Pearsons
    pearsons_wind_hy.loc[idx,'site_id'] = row['site_id']
    corr, _ = pearsonr(tmp_wind_df[row['site_id']].values, tmp_hydro_df[row['site_id']].values)
   
    pearsons_wind_hy.loc[idx,'p'] = corr
    

annual_stab_df.to_csv(f"{c_stab_path}/npd_stab_wind_hydro.csv")
pearsons_wind_hy.to_csv(f"{c_stab_path}/npd_pearson_wind_hydro.csv")

0. Processing Wind profile CO01656 and NPD 9019500, ./data/hydro//npd/synthetic_profiles/npd_9019500_2016_synthetic.csv
No data in site 9019500 
No file for site 7069220 
2. Processing Wind profile NM00003 and NPD 8328950, ./data/hydro//npd/synthetic_profiles/npd_8328950_2016_synthetic.csv
No data in site 8328950 
3. Processing Wind profile PA00110 and NPD 3020000, ./data/hydro//npd/synthetic_profiles/npd_3020000_2019_synthetic.csv
4. Processing Wind profile AL05903 and NPD 3592000, ./data/hydro//npd/synthetic_profiles/npd_3592000_2018_synthetic.csv
5. Processing Wind profile NM00293 and NPD 7227000, ./data/hydro//npd/synthetic_profiles/npd_7227000_2015_synthetic.csv
No data in site 7227000 
6. Processing Wind profile TX00001 and NPD 8063800, ./data/hydro//npd/synthetic_profiles/npd_8063800_2012_synthetic.csv
No data in site 8063800 
7. Processing Wind profile WY01496 and NPD 6670500, ./data/hydro//npd/synthetic_profiles/npd_6670500_2019_synthetic.csv
8. Processing Wind profile CT00583



76. Processing Wind profile CT00378 and NPD 1188090, ./data/hydro//npd/synthetic_profiles/npd_1188090_2014_synthetic.csv
77. Processing Wind profile NC00182 and NPD 2105769, ./data/hydro//npd/synthetic_profiles/npd_2105769_2013_synthetic.csv
78. Processing Wind profile CO02788 and NPD 9041400, ./data/hydro//npd/synthetic_profiles/npd_9041400_2019_synthetic.csv
No data in site 9041400 
79. Processing Wind profile AZ82203 and NPD 9426000, ./data/hydro//npd/synthetic_profiles/npd_9426000_2012_synthetic.csv
80. Processing Wind profile IA01213 and NPD 5463050, ./data/hydro//npd/synthetic_profiles/npd_5463050_2017_synthetic.csv
81. Processing Wind profile NY00558 and NPD 4242500, ./data/hydro//npd/synthetic_profiles/npd_4242500_2011_synthetic.csv
No data in site 4242500 
82. Processing Wind profile 294 and NPD 3209000, ./data/hydro//npd/synthetic_profiles/npd_3209000_2015_synthetic.csv
No data in site 3209000 
83. Processing Wind profile KY03003 and NPD 3320000, ./data/hydro//npd/synthetic_p