In [5]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from typing import Dict, Any

In [6]:
import os
os.getcwd()

'/home/jonasmmiguel/Documents/learning/poli/thesis/wind-stf/prototyping'

# Loading Data

In [3]:
models = pickle.load(open('../data/06_models/model.pkl/2020-10-09T00.59.06.757Z/model.pkl', 'rb'))

In [7]:
df_infer = capacity_factors_daily_2000to2015 = pd.read_hdf(
    path_or_buf='../data/05_model_input/df_infer.hdf', 
    key='df_infer'
)

df_infer.head(3)

Unnamed: 0_level_0,DE145,DE114,DE146,DE132,DE12A,DE133,DE12C,DE11C,DE118,DE119,...,DEG01,DEG0F,DE275,DE21C,DE234,DE251,DE276,DE278,DE718,DE943
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01,0.219507,0.269458,0.24054,0.263019,0.593148,0.331149,0.555176,0.28414,0.357407,0.410291,...,0.300818,0.363388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-02,0.083655,0.063571,0.043303,0.005984,0.080731,0.007986,0.06612,0.083709,0.07874,0.108224,...,0.150699,0.13393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-03,0.246707,0.229298,0.337413,0.047907,0.279092,0.136004,0.23743,0.320095,0.46015,0.516133,...,0.348737,0.297686,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
df_infer_scaled = capacity_factors_daily_2000to2015 = pd.read_hdf(
    path_or_buf='../data/05_model_input/df_infer_scaled.hdf', 
    key='df_infer_scaled'
)

df_infer_scaled.head(3)

Unnamed: 0_level_0,DE145,DE114,DE146,DE132,DE12A,DE133,DE12C,DE11C,DE118,DE119,...,DEG01,DEG0F,DE275,DE21C,DE234,DE251,DE276,DE278,DE718,DE943
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01,6.030707,6.332357,6.507886,6.628375,6.878469,6.701568,6.856132,6.266206,6.590818,6.636133,...,6.701568,6.824102,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08
2013-01-02,5.232691,5.199338,5.104712,4.3913,5.32752,4.303404,5.254946,5.191001,5.556967,5.489596,...,6.141913,5.918106,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08
2013-01-03,6.133282,6.227721,6.755229,5.601776,6.190702,6.046516,6.107795,6.370123,6.856132,6.913633,...,6.856132,6.555127,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08,1e-08


In [9]:
splits_positions = pickle.load(open('../data/05_model_input/cv_splits_positions.pkl/2020-10-09T00.50.52.126Z/cv_splits_positions.pkl', 'rb'))
splits_positions

{'pass 1': {'train': slice(0, 542, None), 'val': slice(542, 549, None)},
 'pass 2': {'train': slice(0, 719, None), 'val': slice(719, 726, None)},
 'pass 3': {'train': slice(0, 896, None), 'val': slice(896, 903, None)}}

In [10]:
modeling = {
  'approach': 'HW-ES',
  'mode': 'districtwise',
  'model_inference_window': {
    'start': '2013-01-01',
    'end': '2015-06-22',
  },
  'test_window': {
    'start': '2015-06-23',
    'end': '2015-06-29',
  },
  'preprocessing': ['get_quantile_equivalent_normal_dist', 'make_strictly_positive'],
  'hyperpars': {
    'trend': 'additive',
      'seasonal': 'multiplicative',
      'seasonal_periods': 7,
  },
  'targets': ['DEF0C', 'DE111'],
}

# Core

In [None]:
class ForecastingModel:
    def __init__(self, df, modeling: Dict[str, Any]):
        self.modeling = modeling
        self.df = df
        self.targets = df.columns
        self.submodels_ = {}
        self.model_ = None

    def fit(self):

        if self.modeling['mode'] == 'districtwise':

            if self.modeling['approach'] == 'HW-ES':
                self.submodels_ = {
                    district: ExponentialSmoothing(self.df[district], **self.modeling['hyperpars'])
                    for district in self.targets
                }
                return self.submodels_

        elif self.modeling['mode'] == 'spatio-temporal':  # i.e. all districts at once

            if self.modeling['approach'] == 'RNN-ES':
                self.model_ = None

            elif self.modeling['approach'] == 'GWNet':
                self.model_ = None

            return self.model_

        else:
            return NotImplementedError('')



    def predict(self, start, end, transformer):
        y_hat = self.model_.predict(start, end)
        y_hat_unscaled = transformer.inverse_transform(y_hat)
        return y_hat_unscaled

In [None]:
df = df_infer_scaled

In [None]:
submodel = ExponentialSmoothing(df_infer_scaled['DEF0C'], **modeling['hyperpars']).fit()

In [None]:
df_infer_scaled.index[720]

In [None]:
submodel.predict(
    start=df_infer_scaled.index[720],
    end=df_infer_scaled.index[721]
)

In [None]:
submodel.predict(
    start = df_infer_scaled[ splits_positions['pass 3']['val'] ].index[0],
    end = df_infer_scaled[ splits_positions['pass 3']['val'] ].index[-1]
)

In [None]:
targets = modeling['targets']

In [None]:
submodels = {
    district: ExponentialSmoothing(df_infer_scaled[district], **modeling['hyperpars']).fit()
    for district in targets
}

In [None]:
yhat = {}
for district in targets:
    yhat[district] = submodels[district].predict(
        start = df_infer_scaled[ splits_positions['pass 3']['val'] ].index[0],
        end = df_infer_scaled[ splits_positions['pass 3']['val'] ].index[-1]
    )
    
yhat = pd.DataFrame(yhat)

In [None]:
yhat

In [None]:
df_infer_scaled[ splits_positions['pass 3']['val'] ][targets]

In [None]:
import sys
sys.path.append('..')

In [None]:
scaler = pickle.load(open('../data/05_model_input/scaler.pkl/2020-10-09T02.14.26.104Z/scaler.pkl','rb'))

In [None]:
scaler

In [None]:
df_preds = pd.DataFrame(
    data=None,
    columns=df_infer_scaled.columns,
    index=yhat.index,
)

df_preds.update(yhat)

df_preds.head()

In [None]:
df_preds['DEF0C']

In [None]:
df_preds_unscaled = pd.DataFrame(
    data = scaler.inverse_transform(df_preds),
    columns = df_preds.columns,
    index = df_preds.index,
)

In [None]:
df_preds_unscaled['DEF0C']

# Core v2

In [11]:
import pickle
import pandas as pd
import numpy as np
from typing import Dict, Any

import sys
sys.path.append('..')
from src.utils.modeling import ForecastingModel

In [12]:
scaler = pickle.load(open('../data/05_model_input/scaler.pkl/2020-10-09T15.53.34.176Z/scaler.pkl', 'rb'))

In [13]:
df = df_infer_scaled

In [14]:
model = {}

# ignore all vars we don't want to model
targets = modeling['targets']
df = df[targets]

# train for every cv split
for pass_id in splits_positions.keys():
    df_train = df[splits_positions[pass_id]['train']]
    model[pass_id] = ForecastingModel(modeling).fit(df_train)

# train model on whole inference dataset
model['full'] = ForecastingModel(modeling).fit(df)

In [15]:
model

{'pass 1': <src.utils.modeling.ForecastingModel at 0x7fc058e63640>,
 'pass 2': <src.utils.modeling.ForecastingModel at 0x7fc058e63370>,
 'pass 3': <src.utils.modeling.ForecastingModel at 0x7fc05da13d90>,
 'full': <src.utils.modeling.ForecastingModel at 0x7fc058e634f0>}

In [16]:
start = df.index[0]
end = df.index[-1]

In [17]:
model['full']

<src.utils.modeling.ForecastingModel at 0x7fc058e634f0>

In [18]:
model['full'].predict(start, end, scaler)

Unnamed: 0_level_0,DEF0C,DE111
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,0.314448,0.098570
2013-01-02,0.299405,0.152936
2013-01-03,0.294443,0.050359
2013-01-04,0.416653,0.046431
2013-01-05,0.489341,0.093469
...,...,...
2015-06-18,0.207454,0.017798
2015-06-19,0.294201,0.049126
2015-06-20,0.329595,0.063864
2015-06-21,0.324806,0.043308
