In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import os

# RNN architecture using keras and sklearn
!pip install sklearn
!pip install keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import keras
from keras.layers import Dense, LSTM, BatchNormalization, Dropout
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras.regularizers import l2

# bayes optimization
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization
from functools import partial

In [None]:
df = pd.read_csv("../data/match_features.csv")

In [None]:
# to be removed when the data is cleaned
df.drop(columns=['Unnamed: 0', 'MP'], inplace=True)

In [None]:
df.shape

In [None]:
data = df
batch_size = 32
epoch = 200
input_dim = 59

In [None]:
# invert the input df
df = df[::-1]

In [None]:
# feed the lstm with all features starting from the open price
# still contains the high
input_features = data.iloc[::, :-1].values
input_data = input_features
labels = data.iloc[::, -1].values

input_data.shape

In [None]:
X = input_data[::]
y = labels[::]

In [None]:
# 20% for as test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False)

In [None]:
X_train.shape

In [None]:
# reshape input data
X_train = np.reshape(X_train, (X_train.shape[0],  input_dim, 1))
X_test = np.reshape(X_test, (X_test.shape[0], input_dim, 1))

In [None]:
X_train.shape

In [None]:
def get_model(input_shape, dropout1_rate=0.2, dropout2_rate=0.2, lr=0.0001):
    #  reference https://publications.lib.chalmers.se/records/fulltext/250411/250411.pdf
    model = Sequential()
    model.add(LSTM(units = 256, return_sequences = True, input_shape = input_shape))
    model.add(Dropout(dropout1_rate))

    model.add(LSTM(units = 256))
    model.add(Dropout(dropout2_rate))

    model.add(Dense(units = 1))
    # model.summary()
    return model

    # model.compile(optimizer=Adam(lr=lr), loss='mean_squared_error', metrics=['accuracy'])
    # model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epoch, batch_size=batch_size)
    # model.save('./model_store/LSTM2.model')
    '''
    model = Sequential()
    model.add(Dense(units=128, input_shape=(input_dim,)))
    model.add(Dense(units=128, activation='relu', activity_regularizer=l2(0.01)))
    model.add(Dense(units=128, activation='relu', activity_regularizer=l2(0.01)))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(optimizer=Adam(lr=0.0001), loss='mean_squared_error', metrics=['accuracy'])
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epoch, batch_size=batch_size)
    '''

iter_count = 1

def fit_with(input_shape, verbose, dropout1_rate, dropout2_rate, lr):
    global iter_count
    
    # reference https://keras.io/api/callbacks/model_checkpoint/
    
    # create model
    model = get_model(input_shape, dropout1_rate, dropout2_rate, lr)
    
    # create optimizer using adam
    optimizer = Adam(learning_rate=lr)
    
    # compile model
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
    
    # setup checkpoints callback
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=f'./model_store/lstm_checkpoints/checkpoint_{iter_count}',
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)
    
    iter_count += 1
    
    # fit model
    model.fit(X_train, y_train, validation_data=(X_test, y_test),
              epochs=epoch, batch_size=batch_size, callbacks=[model_checkpoint_callback])
    
    # evaluate with val dataset
    score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
    print(f"test loss is {score[0]}, test accuracy is {score[1]}")
    
    return score[1]

In [None]:
verbose = 1
input_shape = (input_dim, 1)
fit_with_partial = partial(fit_with, input_shape, verbose)

get_model(input_shape=(input_dim, 1))

# bayesian optimization
pbounds = {'dropout1_rate': (0.1, 0.5), 'dropout2_rate': (0.1, 0.5), 'lr': (1e-4, 1e-2)}

optimizer = BayesianOptimization(
    f = fit_with_partial,
    pbounds=pbounds,
    verbose=2,
    random_state=1,
)

optimizer.maximize(init_points=10, n_iter=10,)

for i, res in enumerate(optimizer.res):
    print("Iteration {}: \n\t{}".format(i, res))

print(optimizer.max)