<a href="https://colab.research.google.com/github/AlexKI123/OnlineChallengeStockMarket/blob/main/KerasTuner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/128.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import keras_tuner as kt

# Load the data
train_data = pd.read_csv('/content/drive/MyDrive/input_training.csv', index_col=0)
train_data_y = pd.read_csv('/content/drive/MyDrive/output_training_gmEd6Zt.csv', index_col=0)

# Merge the input and output data on the 'ID' column
merged_data = train_data.merge(train_data_y, left_index=True, right_index=True)

# Scale the training data
division_value = 100
columns_to_transform = [f'r{i}' for i in range(53)]
for column in columns_to_transform:
    merged_data[column] = merged_data[column] / division_value
merged_data[columns_to_transform] = np.tanh(merged_data[columns_to_transform])

# Replace NaN values with the mask value
mask_value = -2.0
merged_data.fillna(mask_value, inplace=True)

# Drop the 'day' and 'equity' columns from the DataFrame
merged_data = merged_data.drop(['day', 'equity'], axis=1)

# Map labels from [-1, 0, 1] to [0, 1, 2]
label_mapping = {-1: 0, 0: 1, 1: 2}
merged_data['reod'] = merged_data['reod'].map(label_mapping)

# Prepare the data
X = merged_data.drop('reod', axis=1).values.reshape(-1, 53, 1)
y = to_categorical(merged_data['reod'].values)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units_lstm_1', min_value=60, max_value=140, step=20), return_sequences=True, input_shape=(53, 1)))
    model.add(LSTM(units=hp.Int('units_lstm_2', min_value=30, max_value=80, step=20), return_sequences=False))
    model.add(Dropout(rate=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.05)))
    model.add(Dense(units=hp.Int('dense_units', min_value=32, max_value=96, step=32), activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

tuner = kt.Hyperband(build_model, objective='val_accuracy', max_epochs=20, directory='keras_tuner_dir', project_name='lstm_optimization')

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.00001)
#early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Execute the search
tuner.search(X_train, y_train, epochs=20, validation_split=0.2, callbacks=[reduce_lr])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The optimal number of units in the first LSTM layer is {best_hps.get('units_lstm_1')},
the optimal number of units in the second LSTM layer is {best_hps.get('units_lstm_2')},
the optimal dropout rate is {best_hps.get('dropout')}, and
the optimal number of neurons in the first dense layer is {best_hps.get('dense_units')}.
""")

# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=20, validation_split=0.1, callbacks=[reduce_lr])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')


Trial 16 Complete [00h 13m 27s]
val_accuracy: 0.47502535581588745

Best val_accuracy So Far: 0.4776473343372345
Total elapsed time: 02h 57m 37s

Search: Running Trial #17

Value             |Best Value So Far |Hyperparameter
140               |140               |units_lstm_1
70                |70                |units_lstm_2
0                 |0                 |dropout
32                |32                |dense_units
20                |7                 |tuner/epochs
7                 |3                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
2                 |1                 |tuner/round
0013              |0001              |tuner/trial_id

Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  859/18975 [>.............................] - ETA: 2:41 - loss: 0.9581 - accuracy: 0.5092