In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Bidirectional, LSTM, Dropout
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle

sns.set(style='darkgrid', palette='muted', font_scale=1.5, rc={'figure.figsize':(20,10)})

RANDOM_SEED = 40
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

In [4]:
import glob
import pickle
def open_param_results():
    files = glob.glob('./hyper_param_test/*.pkl')
    
    all_models = []
    for file in files:
        with open(file, 'rb') as f:
            [all_models.append(df) for df in pickle.load(f)] 
    return all_models

In [5]:
param_df = open_param_results()
print(len(param_df))

960


In [6]:
def get_top(df_list, num):
    return sorted(df_list, key=lambda df: df['score']['r2'], reverse=True)[:num]

In [52]:
top_dfs = get_top(param_df, 10)
df = top_dfs[0]

In [46]:
def run_models(list_df):
    best_models = []
    for i in range(100):
        for j, df in enumerate(list_df, 1):
            file_path = f'./fitted_models/{i}_{j}'
            print(file_path)
            
            model = Sequential()
            model.add(Bidirectional(
                      LSTM(units=df['hyper_params']['units'], activation=df['hyper_params']['activation_function'], input_shape=(df['X_train'].shape[1], df['X_train'].shape[2])),
                      merge_mode=df['hyper_params']['merge_mode']))
            model.add(Dense(units=1))
            model.compile(loss=df['hyper_params']['loss'], optimizer=df['hyper_params']['optimizer'])

            print(f'Training {i + j}/{len(list_df) * 10}'.center(50, '-'))

            history = model.fit(
                df['X_train'], df['y_train'],
                epochs=100,
                batch_size=df['hyper_params']['batch_size'],
                validation_data=(df['X_test'], df['scaler_y'].transform(df['y_test'])),
                shuffle=False,
                verbose=2
            )

            df['history'] = {'loss': history.history['loss'], 'val_loss': history.history['val_loss']}

            predictions = model.predict(df['X_test'])

            if df['scaler_y']:
                df['predictions'] = df['scaler_y'].inverse_transform(predictions)
            else:
                df['predictions'] = predictions
            
            model.save(file_path)
            
            best_models.append({
                'model': file_path,
                **df.copy(), 
                **{'score': {
                    'RMSE': np.sqrt(mean_squared_error(y_true=df['y_test'], y_pred=df['predictions'])),
                    'MAE': mean_absolute_error(y_true=df['y_test'], y_pred=df['predictions']),
                    'r2': r2_score(y_true=df['y_test'], y_pred=df['predictions'])}}
            })
            
    return best_models

In [47]:
best_models = run_models(top_dfs[:5])

./fitted_models/0_1
------------------Training 1/20-------------------
Epoch 1/25
10/10 - 2s - loss: 0.9504 - val_loss: 0.7968
Epoch 2/25
10/10 - 1s - loss: 0.7490 - val_loss: 0.8277
Epoch 3/25
10/10 - 1s - loss: 0.9304 - val_loss: 0.8023
Epoch 4/25
10/10 - 1s - loss: 0.5026 - val_loss: 0.5943
Epoch 5/25
10/10 - 2s - loss: 0.5193 - val_loss: 0.5463
Epoch 6/25
10/10 - 2s - loss: 0.5051 - val_loss: 0.5507
Epoch 7/25
10/10 - 1s - loss: 0.4616 - val_loss: 0.5262
Epoch 8/25
10/10 - 1s - loss: 0.4499 - val_loss: 0.5017
Epoch 9/25
10/10 - 1s - loss: 0.4425 - val_loss: 0.4768
Epoch 10/25
10/10 - 1s - loss: 0.4262 - val_loss: 0.4649
Epoch 11/25
10/10 - 1s - loss: 0.4091 - val_loss: 0.4481
Epoch 12/25
10/10 - 1s - loss: 0.4023 - val_loss: 0.4381
Epoch 13/25
10/10 - 1s - loss: 0.4099 - val_loss: 0.4244
Epoch 14/25
10/10 - 1s - loss: 0.4006 - val_loss: 0.4169
Epoch 15/25
10/10 - 1s - loss: 0.3895 - val_loss: 0.4067
Epoch 16/25
10/10 - 1s - loss: 0.3900 - val_loss: 0.4028
Epoch 17/25
10/10 - 1s - l

In [48]:
import pickle
def save_results(df_list, name):
    with open(f'./fitted_models/{name}.pkl', 'wb') as f:
        pickle.dump(df_list, f)

In [49]:
save_results(best_models, 'fitted_models')