# Neural Network with Optuna

This notebook trains a fully-connected neural network on the NN feature matrices, tuning depth, width, dropout, optimizer, and learning rate via Optuna.


In [3]:
import optuna
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import f1_score, accuracy_score, classification_report
import tensorflow as tf
from tensorflow import keras


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

DATA_DIR = Path("/Users/aaryan/Desktop/ML_multi_class/preprocessed_csv")
OUTPUT_DIR = Path("/Users/aaryan/Desktop/ML_multi_class")

X_train = pd.read_csv(DATA_DIR / "X_train_nn_smote.csv").to_numpy(dtype=np.float32)
y_train = pd.read_csv(DATA_DIR / "y_train_smote.csv").squeeze().to_numpy()

X_val = pd.read_csv(DATA_DIR / "X_val_nn.csv").to_numpy(dtype=np.float32)
y_val = pd.read_csv(DATA_DIR / "y_val.csv").squeeze().to_numpy()

X_test = pd.read_csv(DATA_DIR / "X_test_nn.csv").to_numpy(dtype=np.float32)
test_ids = pd.read_csv(DATA_DIR / "test_ids.csv")

label_map = pd.read_csv(DATA_DIR / "label_encoder_mapping.csv")
encoded_to_cluster = dict(zip(label_map["encoded_value"], label_map["cluster_name"]))

input_dim = X_train.shape[1]
num_classes = len(np.unique(np.concatenate([y_train, y_val])))

print(f"Train shape: {X_train.shape}, Val shape: {X_val.shape}, Test shape: {X_test.shape}")
print(f"Detected {num_classes} classes")


Train shape: (3895, 19), Val shape: (383, 19), Test shape: (479, 19)
Detected 5 classes


In [5]:
def build_model(trial: optuna.Trial) -> keras.Model:
    n_hidden = trial.suggest_int("n_hidden", 1, 4)
    hidden_units = trial.suggest_int("hidden_units", 128, 768, step=64)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["adam", "rmsprop", "adamw"])

    inputs = keras.Input(shape=(input_dim,))
    x = inputs
    for _ in range(n_hidden):
        x = keras.layers.Dense(hidden_units, activation="relu")(x)
        if dropout > 0:
            x = keras.layers.Dropout(dropout)(x)

    outputs = keras.layers.Dense(num_classes, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)

    if optimizer_name == "adam":
        optimizer = keras.optimizers.Adam(learning_rate=lr)
    elif optimizer_name == "adamw":
        optimizer = keras.optimizers.AdamW(learning_rate=lr)
    else:
        optimizer = keras.optimizers.RMSprop(learning_rate=lr)

    model.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )
    return model


def objective(trial: optuna.Trial) -> float:
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    epochs = trial.suggest_int("epochs", 20, 80, step=10)

    model = build_model(trial)
    early_stop = keras.callbacks.EarlyStopping(
        monitor="val_f1_macro",
        mode="max",
        patience=5,
        restore_best_weights=True,
    )

    class F1Callback(keras.callbacks.Callback):
        def __init__(self):
            self.val_f1 = []

        def on_epoch_end(self, epoch, logs=None):
            preds = np.argmax(self.model.predict(X_val, verbose=0), axis=1)
            f1 = f1_score(y_val, preds, average="macro")
            logs = logs or {}
            logs["val_f1_macro"] = f1
            self.val_f1.append(f1)

    f1_callback = F1Callback()

    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[f1_callback, early_stop],
        verbose=0,
    )

    best_f1 = max(f1_callback.val_f1)
    preds = np.argmax(model.predict(X_val, verbose=0), axis=1)
    acc = accuracy_score(y_val, preds)
    trial.set_user_attr("accuracy", acc)
    return best_f1


In [6]:
study = optuna.create_study(direction="maximize", study_name="nn_macro_f1")
study.optimize(objective, n_trials=25, timeout=3600)

print(f"Best macro F1: {study.best_value:.4f}")
print("Best params:")
for k, v in study.best_trial.params.items():
    print(f"  {k}: {v}")
print(f"Validation accuracy: {study.best_trial.user_attrs['accuracy']:.4f}")


[I 2025-11-27 14:21:02,238] A new study created in memory with name: nn_macro_f1
[I 2025-11-27 14:21:06,917] Trial 0 finished with value: 0.5698211115005373 and parameters: {'batch_size': 128, 'epochs': 70, 'n_hidden': 2, 'hidden_units': 448, 'dropout': 0.32713306579619544, 'lr': 0.0005165722488223508, 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.5698211115005373.
[I 2025-11-27 14:21:19,882] Trial 1 finished with value: 0.5972415041404588 and parameters: {'batch_size': 64, 'epochs': 30, 'n_hidden': 4, 'hidden_units': 576, 'dropout': 0.1781231467477145, 'lr': 0.00017837121041514768, 'optimizer': 'adam'}. Best is trial 1 with value: 0.5972415041404588.
[I 2025-11-27 14:21:29,414] Trial 2 finished with value: 0.6229240737732653 and parameters: {'batch_size': 32, 'epochs': 70, 'n_hidden': 4, 'hidden_units': 128, 'dropout': 0.47671295725378376, 'lr': 0.0002714935065113107, 'optimizer': 'adamw'}. Best is trial 2 with value: 0.6229240737732653.
[I 2025-11-27 14:21:33,082] Trial 3 fi

Best macro F1: 0.6229
Best params:
  batch_size: 32
  epochs: 70
  n_hidden: 4
  hidden_units: 128
  dropout: 0.47671295725378376
  lr: 0.0002714935065113107
  optimizer: adamw
Validation accuracy: 0.7415


In [7]:
best_params = study.best_trial.params.copy()

combined_X = np.vstack([X_train, X_val])
combined_y = np.concatenate([y_train, y_val])

fixed_trial = optuna.trial.FixedTrial(best_params)
best_model = build_model(fixed_trial)

best_model.fit(
    combined_X,
    combined_y,
    epochs=best_params["epochs"],
    batch_size=best_params["batch_size"],
    validation_data=(X_val, y_val),
    verbose=0,
)

val_preds = np.argmax(best_model.predict(X_val, verbose=0), axis=1)
print(classification_report(y_val, val_preds))

proba_test = best_model.predict(X_test, verbose=0)
test_labels = np.argmax(proba_test, axis=1)

submission = pd.DataFrame(
    {
        "participant_id": test_ids.squeeze(),
        "personality_cluster": [encoded_to_cluster[int(label)] for label in test_labels],
    }
)

nn_pred_path = OUTPUT_DIR / "nn_submission.csv"
submission.to_csv(nn_pred_path, index=False)
print(f"Saved submission to {nn_pred_path}")

submission.head()


              precision    recall  f1-score   support

           0       0.75      0.53      0.62        17
           1       0.75      0.55      0.63        44
           2       0.69      0.75      0.72        61
           3       0.63      0.73      0.68        66
           4       0.91      0.91      0.91       195

    accuracy                           0.80       383
   macro avg       0.75      0.69      0.71       383
weighted avg       0.80      0.80      0.79       383

Saved submission to /Users/aaryan/Desktop/ML_multi_class/nn_submission.csv


Unnamed: 0,participant_id,personality_cluster
0,1005,Cluster_E
1,197,Cluster_C
2,2343,Cluster_E
3,1709,Cluster_B
4,436,Cluster_E
