In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pgscen.engine import GeminiEngine
from pgscen.utils.data_utils import split_actuals_hist_future, split_forecasts_hist_future
from pathlib import Path
from pgscen.pca import PCAGeminiEngine, PCAGeminiModel
from pgscen.utils.solar_utils import get_yearly_date_range

In [None]:
!ls ../data/MetaData

In [None]:
def load_solar_data(data_path):
    solar_site_actual_df = pd.read_csv(
        Path(data_path, 'Solar', 'NREL', 'Actual',
             'solar_actual_1h_site_2017_2018_utc.csv'),
        parse_dates=['Time'], index_col='Time'
        )

    solar_site_forecast_df = pd.read_csv(
        Path(data_path, 'Solar', 'NREL', 'Day-ahead',
             'solar_day_ahead_forecast_site_2017_2018_utc.csv'),
        parse_dates=['Issue_time', 'Forecast_time']
        )

    solar_meta_df = pd.read_excel(
        Path(data_path, 'MetaData', 'solar_meta.xlsx'))

    return solar_site_actual_df, solar_site_forecast_df, solar_meta_df

In [None]:
scen_start_time = pd.to_datetime('2018-07-01 06:00:00',utc=True)
nscen = 1000

In [None]:
solar_site_actual_df, solar_site_forecast_df, solar_meta_df = load_solar_data('../data/')

In [None]:
scen_timesteps = pd.date_range(start=scen_start_time,periods=24, freq='H')

(solar_site_actual_hists,
            solar_site_actual_futures) = split_actuals_hist_future(
                    solar_site_actual_df, scen_timesteps)

(solar_site_forecast_hists,
            solar_site_forecast_futures) = split_forecasts_hist_future(
                    solar_site_forecast_df, scen_timesteps)

In [None]:
hist_dates = sorted(get_yearly_date_range(date=scen_start_time,num_of_days=50,
                      start=str(solar_site_actual_hists.index.min().date()),
                      end=str(solar_site_actual_hists.index.max().date())))[:-1]
hist_fcst_issue_times = [t-pd.Timedelta(6,unit='H') for t in hist_dates]

In [None]:
solar_site_forecast_hists = solar_site_forecast_hists[solar_site_forecast_hists['Issue_time'].isin(hist_fcst_issue_times)]
hist_start = solar_site_forecast_hists['Forecast_time'].min()
hist_end = solar_site_forecast_hists['Forecast_time'].max()
solar_site_actual_hists = solar_site_actual_hists[(solar_site_actual_hists.index>=hist_start) & \
                                                  (solar_site_actual_hists.index<=hist_end)]

In [None]:
solar_site_actual_hists

In [None]:
pge = PCAGeminiEngine(solar_site_actual_hists, solar_site_forecast_hists, scen_start_time, solar_meta_df)
dist = pge.asset_distance().values
pge.fit(10, dist / (10 * dist.max()), 5e-2)

In [None]:
pge.model.pca_dict

In [None]:
pge.create_scenario(1000, solar_site_forecast_futures)

In [None]:
pge.model.scen_df.round(2)

In [None]:
pge.model.hist_dev_df

In [None]:
pge.model.asset_cov.to_csv('/Users/xy3134/Research/PERFORM/notebooks/NREL/solar_pca/data/asset_cov.csv',index=False)

In [None]:
save_dir = '/Users/xy3134/Research/PERFORM/Data/Outputs/PGscen/PCA'

pge.write_to_csv(save_dir,solar_site_actual_futures,write_forecasts=True)

In [None]:
datadir = '/Users/xy3134/Research/PERFORM/Data/Outputs/PGscen/PCA/20180102/solar/'
pd.read_csv(datadir+'Adamstown_Solar.csv')

In [None]:
pge.model.fit(1e-2, 1e-2)

In [None]:
# pge.model.asset_cov
pge.model.horizon_cov

In [None]:
md = pge.model

In [None]:
nscen = 1000
md.get_forecast(solar_site_forecast_futures)
md.generate_gauss_pca_scenarios(nscen)

In [None]:
plt.hist(md.scen_df.round(2).values[:,8],bins=30)

In [None]:
scen_df = pd.DataFrame()

for asset in md.asset_list:
    pca_cols = [(asset,t) for t in md.pca_scen_timesteps]
#     cols = 
    md.pca.inverse_transform(md.scen_pca_df[pca_cols].values)

In [None]:
arr = md.pca.inverse_transform(md.scen_pca_df[pca_cols].values)

In [None]:
(arr+md.hist_dev_mean_dict['Adamstown Solar']['mean'])[0,:]

In [None]:
md.hist_dev_mean_dict['Adamstown Solar']['mean']

In [None]:
arr[0,:]