In [None]:
!pip install optuna
import pandas as pd
import numpy as np
import optuna
import json
import math
import tensorflow as tf
from sklearn.metrics import r2_score


def compute_r2_score(model, x_test, y_test):

    #Utility function to compute R2 score

    y_pred = model.predict(x_test)

    for i in y_pred:
        if math.isnan(i):
            return 0

    return r2_score(y_test, y_pred)*100



def create_model(trial):

    lstm_units_1 = trial.suggest_categorical('lstm_units_1', [32, 64, 128, 256])
    lstm_units_2 = trial.suggest_categorical('lstm_units_2', [32, 64, 128, 256])
    lstm_units_3 = trial.suggest_categorical('lstm_units_3', [32, 64, 128, 256])
    dense_units_1 = trial.suggest_categorical('dense_units_1', [64, 128, 256])
    dense_units_2 = trial.suggest_categorical('dense_units_2', [64, 128, 256])
    dropout_rate_1 = trial.suggest_float('dropout_rate_1', 0.2, 0.5, step=0.05)
    dropout_rate_2 = trial.suggest_float('dropout_rate_2', 0.2, 0.5, step=0.05)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(x_train_source.shape[1], x_train_source.shape[2])))
    model.add(tf.keras.layers.LSTM(lstm_units_1, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(lstm_units_2, activation="relu", return_sequences=True))
    model.add(tf.keras.layers.LSTM(lstm_units_3, activation="relu", return_sequences=False))
    model.add(tf.keras.layers.Dense(dense_units_1, activation='relu'))
    model.add(tf.keras.layers.Dropout(dropout_rate_1))
    model.add(tf.keras.layers.Dense(dense_units_2, activation='relu'))
    model.add(tf.keras.layers.Dropout(dropout_rate_2))
    model.add(tf.keras.layers.Dense(1))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae'])

    return model



def objective(trial):

    model = create_model(trial)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10, restore_best_weights=True)

    # Suggest batch size
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

    history = model.fit(x_train_source, y_train_source, epochs=40, validation_split=0.2,
                        batch_size=batch_size, verbose=0, shuffle=True, callbacks=[es])

    val_R2 = compute_r2_score(model, x_test_source, y_test_source)

    return val_R2

def objective_target(trial):

    model = create_model(trial)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10, restore_best_weights=True)

    # Suggest batch size
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])

    history = model.fit(x_train_target, y_train_target, epochs=40, validation_split=0.2,
                        batch_size=batch_size, verbose=0, shuffle=True, callbacks=[es])

    val_R2 = compute_r2_score(model, x_test_target, y_test_target)

    return val_R2

def objective_target_2(trial):

    model = create_model(trial)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10, restore_best_weights=True)

    # Suggest batch size
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])

    history = model.fit(x_train_target_2, y_train_target_2, epochs=40, validation_split=0.2,
                        batch_size=batch_size, verbose=0, shuffle=True, callbacks=[es])

    val_R2 = compute_r2_score(model, x_test_target_2, y_test_target_2)

    return val_R2

def best_param(data, filename):
    study = optuna.create_study(direction='maximize')
    # Start the optimization
    if data == 'source':
        study.optimize(objective, n_trials=100)
    elif data == 'target':
        study.optimize(objective_target, n_trials=60)
    elif data == 'target_2':
        study.optimize(objective_target_2, n_trials=60)

    # Print the best parameters
    print('Best trial:')
    trial = study.best_trial

    print('  Value: {}'.format(trial.value))
    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    with open(filename, 'w') as f:
        json.dump(trial.params, f)


#HYPERPARAMETER TUNING FOR 12 MONTH DATASETS

x_train_source = np.load('Data/Preprocessed data/12M/source_train_x.npy')
y_train_source = np.load('Data/Preprocessed data/12M/source_train_y.npy')
x_test_source = np.load('Data/Preprocessed data/12M/source_test_x.npy')
y_test_source = np.load('Data/Preprocessed data/12M/source_test_y.npy')
x_train_target = np.load('Data/Preprocessed data/12M/target_train_x.npy')
y_train_target = np.load('Data/Preprocessed data/12M/target_train_y.npy')
x_test_target = np.load('Data/Preprocessed data/12M/target_test_x.npy')
y_test_target = np.load('Data/Preprocessed data/12M/target_test_y.npy')
x_train_target_2 = np.load('Data/Preprocessed data/12M/target_2_train_x.npy')
y_train_target_2 = np.load('Data/Preprocessed data/12M/target_2_train_y.npy')
x_test_target_2 = np.load('Data/Preprocessed data/12M/target_2_test_x.npy')
y_test_target_2 = np.load('Data/Preprocessed data/12M/target_2_test_y.npy')


#save best hyperparameters for each base model (source, target, target_2)

best_param('source', 'Models/LSTM/12M/Tuned Hyperparameters/source_params.json')

best_param('target', 'Models/LSTM/12M/Tuned Hyperparameters/target_params.json')

best_param('target_2', 'Models/LSTM/12M/Tuned Hyperparameters/target_2_params.json')


#HYPERPARAMETER TUNING FOR 6 MONTH DATASETS

x_train_target = np.load('Data/Preprocessed data/6M/target_train_x.npy')
y_train_target = np.load('Data/Preprocessed data/6M/target_train_y.npy')
x_test_target = np.load('Data/Preprocessed data/6M/target_test_x.npy')
y_test_target = np.load('Data/Preprocessed data/6M/target_test_y.npy')


#save best hyperparameters for each target model


best_param('target', 'Models/LSTM/6M/Tuned Hyperparameters/target_params.json')
