In [None]:
!pip install optuna
import pandas as pd
import numpy as np
import optuna
import json
import tensorflow as tf
from sklearn.metrics import r2_score



def compute_r2_score(model, x_test, y_test):

    #Utility function to compute R2 score

    y_pred = model.predict(x_test)

    return r2_score(y_test, y_pred)*100



def create_model(trial):

    conv_filters_1 = trial.suggest_categorical('conv_filters_1', [32, 64, 128, 256])
    conv_filters_2 = trial.suggest_categorical('conv_filters_2', [32, 64, 128, 256])
    conv_filters_3 = trial.suggest_categorical('conv_filters_3', [32, 64, 128, 256])
    dense_units_1 = trial.suggest_categorical('dense_units_1', [64, 128, 256])
    dense_units_2 = trial.suggest_categorical('dense_units_2', [64, 128, 256])
    dropout_rate_1 = trial.suggest_float('dropout_rate_1', 0.2, 0.5, step=0.05)
    dropout_rate_2 = trial.suggest_float('dropout_rate_2', 0.2, 0.5, step=0.05)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(x_train_source.shape[1], x_train_source.shape[2])))
    model.add(tf.keras.layers.Conv1D(conv_filters_1, 5, strides=1, padding='valid', activation='relu')) # 1D convolutional layer with 32 filters, a kernel size of 3, and a ReLU activation function
    model.add(tf.keras.layers.MaxPooling1D(2))
    model.add(tf.keras.layers.Conv1D(conv_filters_2, 3, activation='relu'))
    model.add(tf.keras.layers.MaxPooling1D(2))
    model.add(tf.keras.layers.Conv1D(conv_filters_3, 3, activation='relu'))
    model.add(tf.keras.layers.MaxPooling1D(2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(dense_units_1, activation='relu'))
    model.add(tf.keras.layers.Dropout(dropout_rate_1))
    model.add(tf.keras.layers.Dense(dense_units_2, activation='relu'))
    model.add(tf.keras.layers.Dropout(dropout_rate_2))
    model.add(tf.keras.layers.Dense(1))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae'])

    return model



def objective_source(trial):

    model = create_model(trial)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10, restore_best_weights=True)

    # Suggest batch size
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

    history = model.fit(x_train_source, y_train_source, epochs=30, validation_split=0.2,
                        batch_size=batch_size, verbose=0, shuffle=True, callbacks=[es])

    # Return the r2 score of the prediction to the test data

    val_r2 = compute_r2_score(model, x_test_source, y_test_source)

    return val_r2


def objective_target(trial):

    model = create_model(trial)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10, restore_best_weights=True)

    # Suggest batch size
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

    history = model.fit(x_train_target, y_train_target, epochs=30, validation_split=0.2,
                        batch_size=batch_size, verbose=0, shuffle=True, callbacks=[es])

    # Return the r2 score of the prediction to the test data

    val_r2 = compute_r2_score(model, x_test_target, y_test_target)

    return val_r2


def objective_target_2(trial):

    model = create_model(trial)

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10, restore_best_weights=True)

    # Suggest batch size
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

    history = model.fit(x_train_target_2, y_train_target_2, epochs=30, validation_split=0.2,
                        batch_size=batch_size, verbose=0, shuffle=True, callbacks=[es])

    # Return the r2 score of the prediction to the test data

    val_r2 = compute_r2_score(model, x_test_target_2, y_test_target_2)

    return val_r2


def best_param(data, filename):

    pruner = optuna.pruners.MedianPruner(
    n_startup_trials=10,  # Number of trials to wait before pruning
    n_warmup_steps=10,    # Number of epochs before pruning starts
    interval_steps=2     # Prune every epoch after warmup
    )

    study = optuna.create_study(direction='maximize', pruner = pruner)
    # Start the optimization
    if data == 'source':
        study.optimize(objective_source, n_trials=60)
    elif data == 'target':
        study.optimize(objective_target, n_trials=60)
    elif data == 'target_2':
        study.optimize(objective_target_2, n_trials=60)

    # Print the best parameters
    print('Best trial:')
    trial = study.best_trial

    print('  Value: {}'.format(trial.value))
    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    with open(filename, 'w') as f:
        json.dump(trial.params, f)


#load preprocessed data for model fitting

x_train_source = np.load('Data/Preprocessed data/source_train_x.npy')
y_train_source = np.load('Data/Preprocessed data/source_train_y.npy')
x_test_source = np.load('Data/Preprocessed data/source_test_x.npy')
y_test_source = np.load('Data/Preprocessed data/source_test_y.npy')


best_param('source', 'Models/CNN/12M/Tuned Hyperparameters/source_params.json')


#HYPERPARAMETER TUNING FOR 12 MONTH DATASETS

x_train_target = np.load('Data/Preprocessed data/12M/target_train_x.npy')
y_train_target = np.load('Data/Preprocessed data/12M/target_train_y.npy')
x_test_target = np.load('Data/Preprocessed data/12M/target_test_x.npy')
y_test_target = np.load('Data/Preprocessed data/12M/target_test_y.npy')
x_train_target_2 = np.load('Data/Preprocessed data/12M/target_2_train_x.npy')
y_train_target_2 = np.load('Data/Preprocessed data/12M/target_2_train_y.npy')
x_test_target_2 = np.load('Data/Preprocessed data/12M/target_2_test_x.npy')
y_test_target_2 = np.load('Data/Preprocessed data/12M/target_2_test_y.npy')


#save best hyperparameters for each base model (source, target, target_2)

best_param('target', 'Models/CNN/12M/Tuned Hyperparameters/target_params.json')

best_param('target_2', 'Models/CNN/12M/Tuned Hyperparameters/target_2_params.json')


#HYPERPARAMETER TUNING FOR 6 MONTH DATASETS

x_train_target = np.load('Data/Preprocessed data/6M/target_train_x.npy')
y_train_target = np.load('Data/Preprocessed data/6M/target_train_y.npy')
x_test_target = np.load('Data/Preprocessed data/6M/target_test_x.npy')
y_test_target = np.load('Data/Preprocessed data/6M/target_test_y.npy')
x_train_target_2 = np.load('Data/Preprocessed data/6M/target_2_train_x.npy')
y_train_target_2 = np.load('Data/Preprocessed data/6M/target_2_train_y.npy')
x_test_target_2 = np.load('Data/Preprocessed data/6M/target_2_test_x.npy')
y_test_target_2 = np.load('Data/Preprocessed data/6M/target_2_test_y.npy')


#save best hyperparameters for each base model (source, target, target_2)


best_param('target', 'Models/CNN/6M/Tuned Hyperparameters/target_params.json')

best_param('target_2', 'Models/CNN/6M/Tuned Hyperparameters/target_2_params.json')
