### Model Category Configuration

In [2]:
# All the features: ['all', 'actual', 'entsoe', 'weather_t', 'weather_i', 'holiday', 'weekday', 'hour', 'month']
model_cat_id = "01"
feature = ['actual', 'entsoe']

# LSTM layer configuration
layer_conf = [ True, True, True]
cells = [[ 5, 10, 20, 30, 50, 75, 100, 125, 150], [0, 10, 20, 50], [0, 10, 15, 20]]
dropout = [0, 0.1, 0.2]
batch_size = [8]
timesteps = [1]

#### Select backend & Check if keras work

In [3]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras
print(keras.__version__)

3.3.3


In [4]:
import sys
import math
import itertools
import datetime as dt
from decimal import *
import pytz
import time as t
import pandas as pd
import numpy as np
from pandas import read_csv
from numpy import newaxis
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats as stats
import statsmodels.api as stattools
from tabulate import tabulate
import math
import keras
from keras import backend as K
from keras import layers
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (9, 5)
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from lstm_load import data, lstm

In [5]:
path = os.path.join(os.path.dirname(''), '../data/fulldataset.csv')
loc_tz = pytz.timezone('Europe/Zurich')
split_date = loc_tz.localize(dt.datetime(2017,2,1,0,0,0,0))
validation_split = 0.2
epochs = 30
verbose = 0
results = pd.DataFrame(columns=['module_name', 'config', 'dropout', 'train_loss', 'train_rmse', 'train_mae', 'train_mape', 'valid_loss', 'valid_rmse', 'valid_mae', 'valid_mape', 'test_rmse', 'test_mae', 'test_mape', 'epochs', 'batch_train', 'input_shape', 'total_time', 'time_step', 'splits'])
early_stopping = True
min_delta = 0.006
patience = 2

In [6]:
def generate_combinations(model_name=None, layer_conf=None, cells=None, dropout=None, batch_size=None, timesteps=None):
    models = []
    layer_conb = list(itertools.product(*cells))
    configs = [layer_conb, dropout, batch_size, timesteps]
    combinations = list(itertools.product(*configs))

    for ix, comb in enumerate(combinations):
        m_name = model_name
        m_name += str(ix + 1)

        layers = []
        for idx, level in enumerate(comb[0]):
            return_sequence = True
            if all(size == 0 for size in comb[0][idx + 1:]) == True:
                return_sequence = False
            if (idx + 1) == len(comb[0]):
                return_sequence = False
            if level > 0:
                layers.append({'type': 'lstm', 'cells': level, 'dropout': comb[1], 'statful': layer_conf[idx], 'ret_seq': return_sequence })
                m_name += '_1-' + str(comb[1])
        if comb[1] > 0:
            m_name += '_d-' + str(comb[1])
        model_config = {
            'name': m_name,
            'layers': layers,
            'batch_size': comb[2],
            'timesteps': comb[3]
        }
        models.append(model_config)

        print('==================')
        print(tabulate([
            ['Number of model configs generated', len(combinations)]],
            tablefmt="jira", numalign="right", floatfmt=".3f"))
        return models

#### Model Generation

In [9]:
result_dir = '../results/notebook_' + model_cat_id + '/'
plot_dir = '../plots/notebook_' + model_cat_id + '/'
model_dir = '../models/notebook_' + model_cat_id + '/'
os.makedirs(result_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
output_table = result_dir + model_cat_id + '_results_' + t.strftime("%Y%m%d") + '.csv'
test_output_table = result_dir + model_cat_id + '_test_results' + t.strftime("%Y%m%d") + '.csv'

models = []
models = generate_combinations(
    model_name=model_cat_id + '_', layer_conf=layer_conf, cells=cells, dropout=dropout,
    batch_size=batch_size,timesteps=[1]
)

| Number of model configs generated | 432 |


#### Data Loading

In [12]:
df = data.load_dataset(path=abspath, modules=feature)
df_scaled = df.copy()
df_scaled = df_scaled.dropna()
floats = [key for key in dict(df_scaled.dtypes) if dict(df_scaled.dtypes)[key] in ['float64']]
scaler =  StandardScaler()
scaled_columns = scaler.fit_transform(df_scaled[floats]) # noraml distribution
df_scaled[floats] = scaled_columns
df_train = df_scaled.loc[(df_scaled.index < split_date)].copy()
df_test = df_scaled.loc[df_scaled.index >= split_date].copy()
y_train = df_train['actual'].copy()
X_train = df_train.drop('actual', axis=1).copy()
y_test = df_test['actual'].copy()
X_test = df_test.drop('actual', axis=1).copy()


TypeError: Already tz-aware, use tz_convert to convert.

#### Training models on all configuration