In [None]:
from pathlib import Path
import pandas as pd
from pgscen.command_line import (load_load_data, load_solar_data,
                                 split_actuals_hist_future,
                                 split_forecasts_hist_future)


cur_path = Path("day-ahead_load_scenario.ipynb").parent.resolve()
data_dir = Path(cur_path, '..', "data").resolve()
load_zone_actual_df, load_zone_forecast_df = load_load_data()
(solar_site_actual_df, solar_site_forecast_df,
            solar_meta_df) = load_solar_data()

scenario_start_time = pd.to_datetime('2018-10-01 06:00:00',utc=True)
scen_timesteps = pd.date_range(start=scenario_start_time,
                               periods=24, freq='H')

(load_zone_actual_hists,
     load_zone_actual_futures) = split_actuals_hist_future(
            load_zone_actual_df, scen_timesteps)

(load_zone_forecast_hists,
     load_zone_forecast_futures) = split_forecasts_hist_future(
            load_zone_forecast_df, scen_timesteps)

(solar_site_actual_hists,
     solar_site_actual_futures) = split_actuals_hist_future(
            solar_site_actual_df, scen_timesteps)

(solar_site_forecast_hists,
     solar_site_forecast_futures) = split_forecasts_hist_future(
            solar_site_forecast_df, scen_timesteps)

In [None]:
from pgscen.engine import SolarGeminiEngine

scenario_count = 1000

se = SolarGeminiEngine(solar_site_actual_hists,
                       solar_site_forecast_hists,
                       scenario_start_time, solar_meta_df)

se.fit_load_solar_joint_model(load_zone_actual_hists,
                              load_zone_forecast_hists)
se.create_load_solar_joint_scenario(scenario_count,
                                    load_zone_forecast_futures,
                                    solar_site_forecast_futures)

In [None]:
out_dir = Path(data_dir, '..')
se.write_to_csv(out_dir, {'load': load_zone_actual_futures,
                          'solar': solar_site_actual_futures},
                write_forecasts=True)
!ls {Path(out_dir, "20181001", 'solar')}

In [None]:
df = pd.read_csv("{}/20181001/solar/Adamstown_Solar.csv".format(out_dir))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [19, 11]

_ = plt.hist(df['1700'], bins=30)

In [None]:
df1 = pd.read_csv("{}/20181001/load/South_Central.csv".format(out_dir))
df2 = pd.read_csv("{}/20181001/load/Coast.csv".format(out_dir))

# df1 = pd.read_csv('/Users/xy3134/Research/PERFORM/Data/GEMINI_scenario/test/20180210/solar/solar290.csv')
# df2 = pd.read_csv('/Users/xy3134/Research/PERFORM/Data/GEMINI_scenario/test/20180210/solar/solar291.csv')

In [None]:
plt.hist(df.iloc[2:, :]['1900'], bins=30)
plt.vlines(df.iloc[0:1, :]['1900'], ymin=0, ymax=120, color='red')
plt.vlines(df.iloc[1:2, :]['1900'], ymin=0, ymax=120, color='green')

In [None]:
import numpy as np

idx = np.random.randint(1000)
# idx = 200
plt.plot(df1.iloc[0, 2:],label='actual')
plt.plot(df1.iloc[1, 2:],label='forecast')
plt.plot(df1.iloc[idx, 2:],label='scenario')
plt.legend()

In [None]:
idx = 200
plt.plot(df2.iloc[0,2:],label='actual')
plt.plot(df2.iloc[1,2:],label='forecast')
plt.plot(df2.iloc[idx,2:],label='scenario')
plt.legend()

In [None]:
plt_asset, plt_ts = tuple(se.gemini_dict['day']['solar_model'].conditional_gpd_dict.keys())[204]
print('   '.join([plt_asset, str(plt_ts)]))

_ = plt.hist(
    se.gemini_dict['day']['solar_model'].conditional_gpd_dict[
        plt_asset, plt_ts].slots['data'],
    bins=30
    )

In [None]:
load_md = se.gemini_dict['day']['load_model']
# df[df['1900']>18000]
_ = plt.plot(load_md.scen_df.loc[893, 'Coast'])
_ = plt.plot(load_md.scen_df.loc[342, 'Coast'])

In [None]:
plt_ts = load_md.scen_timesteps[3]
print(plt_ts)

_ = plt.hist(load_md.scen_df['Coast'][plt_ts], bins=30)

In [None]:
load_act_fcst_df = load_zone_actual_df.merge(
    load_zone_forecast_df, how='inner',
    left_on='Time', right_on='Forecast_time',
    suffixes=['_Actual','_Forecast']
    )

df = load_act_fcst_df.loc[:, ['Coast_Actual', 'Coast_Forecast']]
df['Deviation'] = df['Coast_Actual'] - df['Coast_Forecast']
_ = plt.scatter(df['Coast_Forecast'], df['Deviation'])

In [None]:
joint_md = se.gemini_dict['day']['joint_model']

cov = joint_md.asset_cov.values
arr = (np.diag(1 / np.sqrt(np.diag(cov)))
       @ cov @ np.diag(1 / np.sqrt(np.diag(cov))))

corr = pd.DataFrame(data=arr,
                    columns=joint_md.asset_cov.columns,
                    index=joint_md.asset_cov.index)

corr

In [None]:
import seaborn as sns
from scipy.spatial import distance
from scipy.cluster.hierarchy import linkage, dendrogram

cov_cmap = sns.diverging_palette(3, 237, s=81, l=43, sep=3, as_cmap=True)

def get_clustermat(cov_mat):
    clust_order = dendrogram(linkage(distance.pdist(cov_mat,
                                                    metric='euclidean'),
                                     method='centroid'),
                             no_plot=True)['leaves']
    
    return cov_mat.iloc[clust_order, clust_order]

_ = sns.heatmap(get_clustermat(corr), cmap=cov_cmap, vmin=-1, vmax=1, square=True)

In [None]:
_ = sns.heatmap(joint_md.gauss_df)

In [None]:
from statsmodels.distributions.empirical_distribution import ECDF
from scipy.stats import norm
import numpy as np

arr = np.random.randn(1000)
f = ECDF(arr)
_ = plt.hist(np.clip(norm.ppf(f(arr)), 0, 1), bins=100)