In [1]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from typing import Dict, Any

In [2]:
import os
os.getcwd()

'/home/jonasmmiguel/Documents/learning/poli/thesis/wind-stf/prototyping'

In [15]:
models = pickle.load(open('../data/06_models/model.pkl/2020-10-09T00.59.06.757Z/model.pkl', 'rb'))

# MISC

In [3]:
df_infer = capacity_factors_daily_2000to2015 = pd.read_hdf(
    path_or_buf='../data/05_model_input/df_infer.hdf', 
    key='df_infer'
)

df_infer.head(3)

Unnamed: 0_level_0,DE145,DE114,DE146,DE132,DE12A,DE133,DE12C,DE11C,DE118,DE119,...,DEG01,DEG0F,DE275,DE21C,DE234,DE251,DE276,DE278,DE718,DE943
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01,0.219507,0.269458,0.24054,0.263019,0.593148,0.331149,0.555176,0.28414,0.357407,0.410291,...,0.300818,0.363388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-02,0.083655,0.063571,0.043303,0.005984,0.080731,0.007986,0.06612,0.083709,0.07874,0.108224,...,0.150699,0.13393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-03,0.246707,0.229298,0.337413,0.047907,0.279092,0.136004,0.23743,0.320095,0.46015,0.516133,...,0.348737,0.297686,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
splits_positions = pickle.load(open('../data/05_model_input/cv_splits_positions.pkl/2020-10-09T00.50.52.126Z/cv_splits_positions.pkl', 'rb'))
splits_positions

{'pass 1': {'train': slice(0, 542, None), 'val': slice(542, 549, None)},
 'pass 2': {'train': slice(0, 719, None), 'val': slice(719, 726, None)},
 'pass 3': {'train': slice(0, 896, None), 'val': slice(896, 903, None)}}

In [5]:
modeling = {
  'approach': 'HW-ES',
  'mode': 'districtwise',
  'model_inference_window': {
    'start': '2013-01-01',
    'end': '2015-06-22',
  },
  'test_window': {
    'start': '2015-06-23',
    'end': '2015-06-29',
  },
  'preprocessing': ['get_quantile_equivalent_normal_dist', 'make_strictly_positive'],
  'hyperpars': {
    'trend': 'additive',
      'seasonal': 'multiplicative',
      'seasonal_periods': 365,
  },
  'targets': ['DEF0C', 'DE111'],
}

In [6]:
class ForecastingModel:
    def __init__(self, df, modeling: Dict[str, Any]):
        self.modeling = modeling
        self.df = df
        self.targets = df.columns
        self.submodels_ = {}
        self.model_ = None

    def fit(self):

        if self.modeling['mode'] == 'districtwise':

            if self.modeling['approach'] == 'HW-ES':
                self.submodels_ = {
                    district: ExponentialSmoothing(self.df[district], **self.modeling['hyperpars'])
                    for district in self.targets
                }
                return self.submodels_

        elif self.modeling['mode'] == 'spatio-temporal':  # i.e. all districts at once

            if self.modeling['approach'] == 'RNN-ES':
                self.model_ = None

            elif self.modeling['approach'] == 'GWNet':
                self.model_ = None

            return self.model_

        else:
            return NotImplementedError('')



    def predict(self, start, end, transformer):
        y_hat = self.model_.predict(start, end)
        y_hat_unscaled = transformer.inverse_transform(y_hat)
        return y_hat_unscaled

In [8]:
df = df_infer

In [9]:
model = {}

# ignore all vars we don't want to model
targets = modeling['targets']
df = df[targets]

for pass_id in splits_positions.keys():
    model[pass_id] = ForecastingModel(
        df[ splits_positions[pass_id]['train'] ],
        modeling
    ).fit()

In [10]:
model

{'pass 1': {'DEF0C': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaeba30>,
  'DE111': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaeb8b0>},
 'pass 2': {'DEF0C': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaeb1c0>,
  'DE111': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaeb970>},
 'pass 3': {'DEF0C': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaebe20>,
  'DE111': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaebfd0>}}

In [12]:
ForecastingModel(
        df[ splits_positions[pass_id]['train'] ],
        modeling
    ).fit()

{'DEF0C': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaebdc0>,
 'DE111': <statsmodels.tsa.holtwinters.ExponentialSmoothing at 0x7feb2eaeb820>}