In [None]:
import pathlib

import numpy as np
import optuna
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, Normalizer, RobustScaler

In [None]:
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
N_INPUTS = 19
N_CLASSES = 3

In [None]:
def objective(trial):
    cleaned_data_path = pathlib.Path("../data/interim/ctg_cleaned_manually.xlsx")
    data = pd.read_excel(cleaned_data_path, header=0)
    x_raw = data.loc[:, "Min":"SUSP"].to_numpy()
    y_raw_nsp = data.loc[:, "NSP"].to_numpy()
    y_nsp_one_hot = tf.one_hot(indices=y_raw_nsp, depth=max(y_raw_nsp)).numpy()

    x_train = x_raw[:1913]
    x_test = x_raw[1913:]
    y_train = y_nsp_one_hot[:1913]
    y_test = y_nsp_one_hot[1913:]

    n_layers = trial.suggest_int("n_layers", 1, 3)
    batch_size = trial.suggest_int("batch_size", 1, 64, log=True)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-6, 1e-3)
    epochs = trial.suggest_int("epochs", 5, 50, 5)
    batch_norm = trial.suggest_categorical("batch norm", [True, False])
    normalize = trial.suggest_categorical("normalize", ["minmax", "meanstd", "robust"])

    if normalize == "minmax":
        scaler = MinMaxScaler()
        scaler.fit(x_train)
        x_train = scaler.transform(x_train)
        x_test = scaler.transform(x_test)
    elif normalize == "meanstd":
        scaler = Normalizer()
        scaler.fit(x_train)
        x_train = scaler.transform(x_train)
        x_test = scaler.transform(x_test)
    elif normalize == "robust":
        scaler = RobustScaler()
        scaler.fit(x_train)
        x_train = scaler.transform(x_train)
        x_test = scaler.transform(x_test)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units=N_INPUTS, activation="relu"))

    for i in range(n_layers):
        num_hidden = trial.suggest_int("n_units_l{}".format(i), 4, 128, log=True)
        model.add(tf.keras.layers.Dense(num_hidden, activation="relu"))
        if batch_norm:
            model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dense(N_CLASSES, activation="sigmoid"))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",
        metrics="categorical_accuracy",
    )
    model.fit(
        x=x_train,
        y=y_train,
        batch_size=batch_size,
        validation_split=0.2,
        epochs=epochs,
        verbose=0,
    )
    [loss, accuracy] = model.evaluate(x=x_test, y=y_test)
    if accuracy >= 0.9953:
        model.save(f"../models/optuna_keras")
    return accuracy

In [None]:
study = optuna.create_study(
    storage="sqlite:///../database/study/keras_mlp_win10.sqlite",
    sampler=optuna.samplers.TPESampler(seed=42),
    direction="maximize",
    study_name="keras_mlp",
    load_if_exists=True,
)
study.optimize(objective, n_trials=100, n_jobs=8)

In [None]:
study.best_params

In [None]:
cleaned_data_path = pathlib.Path("../data/interim/ctg_cleaned_manually.xlsx")
data = pd.read_excel(cleaned_data_path, header=0)
x_raw = data.loc[:, "Min":"SUSP"].to_numpy()
y_raw_nsp = data.loc[:, "NSP"].to_numpy()
y_nsp_one_hot = tf.one_hot(indices=y_raw_nsp, depth=max(y_raw_nsp)).numpy()

x_train = x_raw[:1913]
x_test = x_raw[1913:]
y_train = y_nsp_one_hot[:1913]
y_test = y_nsp_one_hot[1913:]

scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
model = tf.keras.models.load_model("../models/optuna_keras/")

[loss, accuracy] = model.evaluate(x=x_test, y=y_test)

In [None]:
%%timeit
model.predict(x_test[0:1])