In [4]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [5]:
with open('../data/05_model_input/cv_splits_dict.pkl/2020-08-21T06.02.21.287Z/cv_splits_dict.pkl', 'rb') as pkl_file:
    cv_splits_dict = pickle.load(pkl_file)
    
df_spatiotemporal = capacity_factors_daily_2000to2015 = pd.read_hdf(
    path_or_buf='../data/05_model_input/df_spatiotemporal.hdf', 
    key='df_spatiotemporal'
)

In [19]:
df_spatiotemporal['temporal'].columns.get_level_values('district')

DE111
DE114
DE115
DE116
DE118
DE119
DE11A
DE11B
DE11C
DE11D
DE122
DE127
DE12A
DE12C
DE131
DE132
DE133
DE134
DE135
DE136
DE137
DE139
DE141
DE143
DE145
DE146
DE148
DE149
DE212
DE217
DE218
DE219
DE21B
DE21C
DE21E
DE21I
DE21J
DE21M
DE21N
DE226
DE227
DE228
DE22A
DE22C
DE232
DE234
DE235
DE236
DE237
DE238
DE23A
DE245
DE246
DE247
DE248
DE249
DE24A
DE24B
DE24D
DE251
DE256
DE257
DE258
DE259
DE25A
DE25B
DE25C
DE265
DE267
DE268
DE269
DE26A
DE26B
DE26C
DE275
DE276
DE277
DE278
DE279
DE27B
DE27C
DE27D
DE27E
DE401
DE402
DE405
DE406
DE407
DE408
DE40A
DE40B
DE40D
DE40E
DE40F
DE40G
DE40H
DE40I
DE712
DE716
DE718
DE719
DE71B
DE71D
DE71E
DE721
DE722
DE723
DE724
DE725
DE732
DE733
DE734
DE735
DE736
DE737
DE803
DE80J
DE80K
DE80L
DE80M
DE80N
DE80O
DE911
DE912
DE913
DE914
DE915
DE916
DE917
DE918
DE919
DE91A
DE91B
DE922
DE923
DE925
DE926
DE927
DE928
DE929
DE931
DE932
DE933
DE934
DE935
DE936
DE937
DE938
DE939
DE93A
DE93B
DE941
DE942
DE943
DE944
DE945
DE946
DE947
DE948
DE949
DE94A
DE94B
DE94C
DE94D
DE94E
DE94F
DE94

In [20]:
df_spatiotemporal['temporal']['DEF0C']

var,power
2014-01-01,0.284478
2014-01-02,0.391596
2014-01-03,0.444632
2014-01-04,0.451600
2014-01-05,0.211923
...,...
2015-12-27,0.652169
2015-12-28,0.313875
2015-12-29,0.591666
2015-12-30,0.460236


In [49]:
def _split_train_test(df, cv_splits_dict, pass_id: str):
    train_idx_start = cv_splits_dict[pass_id]['train_idx'][0]
    train_idx_end = cv_splits_dict[pass_id]['train_idx'][1]

    test_idx_start = cv_splits_dict[pass_id]['test_idx'][0]
    test_idx_end = cv_splits_dict[pass_id]['test_idx'][1]

    return {
        'train': df.iloc[train_idx_start:train_idx_end, :],
        'test': df.iloc[test_idx_start:test_idx_end, :],
    }

In [52]:
ts = df_spatiotemporal['temporal']['DEF0C']
y = _split_train_test(ts, cv_splits_dict, pass_id='pass_1')
display(y['train'], y['test'])

array([[0.28447777],
       [0.39159614],
       [0.44463235],
       [0.45160032],
       [0.2119228 ],
       [0.5856315 ],
       [0.72301984],
       [0.59121961],
       [0.6438724 ],
       [0.60723782],
       [0.4907928 ],
       [0.30185362],
       [0.27430982],
       [0.09641669],
       [0.21961748],
       [0.38344644],
       [0.14508021],
       [0.28168859],
       [0.83497463],
       [0.47229633],
       [0.18486095],
       [0.43324548],
       [0.52671176],
       [0.51063076],
       [0.36622803],
       [0.49504269],
       [0.37305679],
       [0.38046237],
       [0.45959083],
       [0.2889325 ],
       [0.2214742 ],
       [0.4826624 ],
       [0.28419035],
       [0.19381382],
       [0.16267358],
       [0.29773918],
       [0.42420632],
       [0.5354983 ],
       [0.4683549 ],
       [0.7270523 ],
       [0.20269003],
       [0.17416096],
       [0.42793402],
       [0.440872  ],
       [0.30451836],
       [0.79308219],
       [0.69743182],
       [0.141

var,power
2015-01-01,0.700989
2015-01-02,0.885246
2015-01-03,0.663139
2015-01-04,0.322626
2015-01-05,0.220387
2015-01-06,0.192569
2015-01-07,0.352281


In [58]:
type( y['train']['power'] )

pandas.core.series.Series

In [65]:
y['train'].shape

(365, 1)

In [68]:
y['train']

var,power
2014-01-01,0.284478
2014-01-02,0.391596
2014-01-03,0.444632
2014-01-04,0.451600
2014-01-05,0.211923
...,...
2014-12-27,0.157756
2014-12-28,0.148551
2014-12-29,0.194382
2014-12-30,0.210987


In [69]:
ts

var,power
2014-01-01,0.284478
2014-01-02,0.391596
2014-01-03,0.444632
2014-01-04,0.451600
2014-01-05,0.211923
...,...
2015-12-27,0.652169
2015-12-28,0.313875
2015-12-29,0.591666
2015-12-30,0.460236


In [81]:
model = {}
pred = {}
# for district in df_spatiotemporal['temporal'].columns.get_level_values('district'):
for district in ['DEF0C', 'DE111']:
    
    model[district] = {}
    pred[district] = {}
    ts = df_spatiotemporal['temporal'][district]
    
    for pass_id in cv_splits_dict.keys():
        
        y = _split_train_test(ts, cv_splits_dict, pass_id)
        
        model[district][pass_id] = ExponentialSmoothing(
            # y['train']['power'].values, 
            y['train'], 
            trend='add',
            seasonal='mul',
            seasonal_periods=7,
        ).fit()
    
        pred[district][pass_id] = model[district][pass_id].predict(
            start=y['test'].index.values[0],
            end=y['test'].index.values[1]
        )


  warn("Optimization failed to converge. Check mle_retvals.",
  warn("Optimization failed to converge. Check mle_retvals.",
  warn("Optimization failed to converge. Check mle_retvals.",
  warn("Optimization failed to converge. Check mle_retvals.",


In [79]:
y['test'].index.values[0]

numpy.datetime64('2015-01-01T00:00:00.000000000')

In [39]:
pred

{'DEF0C': {'pass_1': 2014-01-01    0.284965
  2014-01-02    0.392202
  2014-01-03    0.445212
  2014-01-04    0.452092
  2014-01-05    0.212245
                  ...   
  2015-12-27    0.359033
  2015-12-28    0.347782
  2015-12-29    0.453832
  2015-12-30    0.497939
  2015-12-31    0.574648
  Freq: D, Length: 730, dtype: float64,
  'pass_2': 2014-01-01    0.284965
  2014-01-02    0.392202
  2014-01-03    0.445212
  2014-01-04    0.452092
  2014-01-05    0.212245
                  ...   
  2015-12-27    0.359033
  2015-12-28    0.347782
  2015-12-29    0.453832
  2015-12-30    0.497939
  2015-12-31    0.574648
  Freq: D, Length: 730, dtype: float64,
  'pass_3': 2014-01-01    0.284965
  2014-01-02    0.392202
  2014-01-03    0.445212
  2014-01-04    0.452092
  2014-01-05    0.212245
                  ...   
  2015-12-27    0.359033
  2015-12-28    0.347782
  2015-12-29    0.453832
  2015-12-30    0.497939
  2015-12-31    0.574648
  Freq: D, Length: 730, dtype: float64},
 'DE111': {'pas