### Baseline model
hidden_layer_sizes=(256, 128),   # 2층 MLP <br> activation='relu', <br> solver='adam', <br> alpha=1e-4,         # L2 regularization <br> batch_size=128, <br> learning_rate='adaptive', <br> learning_rate_init=1e-3, <br> max_iter=200, <br> early_stopping=True

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, roc_auc_score, average_precision_score
from sklearn.preprocessing import LabelBinarizer

In [2]:
train_df = pd.read_csv('../../dataset/closed_world/closedworld_train.csv')
test_df  = pd.read_csv('../../dataset/closed_world/closedworld_test.csv')

print("raw train shape :", train_df.shape)
print("raw test shape  :", test_df.shape)
print("raw unique labels in train:", train_df["label"].nunique())

raw train shape : (13300, 27)
raw test shape  : (5700, 27)
raw unique labels in train: 95


In [3]:
# feature/target 분리
X = train_df.drop(columns=["label"]).values
y = train_df["label"].values

X_test = test_df.drop(columns=["label"]).values
y_test = test_df["label"].values

# train/val split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(f"[INFO] Train set: {X_train.shape}, Val set: {X_val.shape}")

[INFO] Train set: (10640, 26), Val set: (2660, 26)


In [4]:
mlp = MLPClassifier(
    hidden_layer_sizes=(256, 128),
    activation='relu',
    solver='adam',
    alpha=1e-4,                           # L2 regularization
    batch_size=128,
    learning_rate='adaptive',
    learning_rate_init=1e-3,
    max_iter=400,
    early_stopping=True,
    validation_fraction=0.15,
    n_iter_no_change=15,
    random_state=42,
    verbose=True
)

In [5]:
print("\n[INFO] Training MLP (256-128) ...")
mlp.fit(X_train, y_train)


[INFO] Training MLP (256-128) ...
Iteration 1, loss = 4.19737907
Validation score: 0.175439
Iteration 2, loss = 3.21673406
Validation score: 0.263784
Iteration 3, loss = 2.78152775
Validation score: 0.315789
Iteration 4, loss = 2.56233111
Validation score: 0.339599
Iteration 5, loss = 2.39435241
Validation score: 0.370927
Iteration 6, loss = 2.26532798
Validation score: 0.387845
Iteration 7, loss = 2.15262626
Validation score: 0.397870
Iteration 8, loss = 2.05643832
Validation score: 0.427945
Iteration 9, loss = 1.97398602
Validation score: 0.426065
Iteration 10, loss = 1.90398910
Validation score: 0.436717
Iteration 11, loss = 1.82955477
Validation score: 0.450501
Iteration 12, loss = 1.76831408
Validation score: 0.452381
Iteration 13, loss = 1.71236957
Validation score: 0.471805
Iteration 14, loss = 1.65770675
Validation score: 0.476817
Iteration 15, loss = 1.60529269
Validation score: 0.467419
Iteration 16, loss = 1.56055179
Validation score: 0.484336
Iteration 17, loss = 1.5231782

In [6]:
val_pred = mlp.predict(X_val)
val_proba = mlp.predict_proba(X_val)

# LabelBinarizer for multiclass ROC/PR
lb = LabelBinarizer()
lb.fit(y)

y_val_bin = lb.transform(y_val)

# Metrics
val_acc = accuracy_score(y_val, val_pred)
val_f1_macro = f1_score(y_val, val_pred, average='macro')
val_f1_micro = f1_score(y_val, val_pred, average='micro')
val_f1_weighted = f1_score(y_val, val_pred, average='weighted')

# ROC-AUC
val_roc_auc = roc_auc_score(y_val_bin, val_proba, multi_class="ovr", average="macro")

# PR-AUC
pr_auc_list = []
for i in range(len(lb.classes_)):
    pr_auc_list.append(average_precision_score(y_val_bin[:, i], val_proba[:, i]))
val_pr_auc_macro = np.mean(pr_auc_list)

print("\n===== [VAL RESULTS] =====")
print(f"Accuracy       : {val_acc:.4f}")
print(f"F1 (macro)     : {val_f1_macro:.4f}")
print(f"F1 (micro)     : {val_f1_micro:.4f}")
print(f"F1 (weighted)  : {val_f1_weighted:.4f}")
print(f"ROC-AUC (OvR)  : {val_roc_auc:.4f}")
print(f"PR-AUC (macro) : {val_pr_auc_macro:.4f}")
print("\nClassification Report:")
print(classification_report(y_val, val_pred, digits=4))


===== [VAL RESULTS] =====
Accuracy       : 0.6305
F1 (macro)     : 0.6297
F1 (micro)     : 0.6305
F1 (weighted)  : 0.6297
ROC-AUC (OvR)  : 0.9792
PR-AUC (macro) : 0.6817

Classification Report:
              precision    recall  f1-score   support

           0     0.4828    0.5000    0.4912        28
           1     0.6667    0.5714    0.6154        28
           2     0.5556    0.7143    0.6250        28
           3     0.6800    0.6071    0.6415        28
           4     0.6364    0.5000    0.5600        28
           5     0.5600    0.5000    0.5283        28
           6     0.6000    0.7500    0.6667        28
           7     0.5758    0.6786    0.6230        28
           8     0.5152    0.6071    0.5574        28
           9     0.5667    0.6071    0.5862        28
          10     0.4839    0.5357    0.5085        28
          11     0.7037    0.6786    0.6909        28
          12     0.8947    0.6071    0.7234        28
          13     0.4211    0.2857    0.3404     

In [7]:
test_pred = mlp.predict(X_test)
test_proba = mlp.predict_proba(X_test)

y_test_bin = lb.transform(y_test)

test_acc = accuracy_score(y_test, test_pred)
test_f1_macro = f1_score(y_test, test_pred, average='macro')
test_f1_micro = f1_score(y_test, test_pred, average='micro')
test_f1_weighted = f1_score(y_test, test_pred, average='weighted')

test_roc_auc = roc_auc_score(y_test_bin, test_proba, multi_class="ovr", average="macro")

pr_auc_list_test = []
for i in range(len(lb.classes_)):
    pr_auc_list_test.append(average_precision_score(y_test_bin[:, i], test_proba[:, i]))
test_pr_auc_macro = np.mean(pr_auc_list_test)

print("\n===== [TEST RESULTS] =====")
print(f"Accuracy       : {test_acc:.4f}")
print(f"F1 (macro)     : {test_f1_macro:.4f}")
print(f"F1 (micro)     : {test_f1_micro:.4f}")
print(f"F1 (weighted)  : {test_f1_weighted:.4f}")
print(f"ROC-AUC (OvR)  : {test_roc_auc:.4f}")
print(f"PR-AUC (macro) : {test_pr_auc_macro:.4f}")

print("\nClassification Report (Test):")
print(classification_report(y_test, test_pred, digits=4))


===== [TEST RESULTS] =====
Accuracy       : 0.6396
F1 (macro)     : 0.6376
F1 (micro)     : 0.6396
F1 (weighted)  : 0.6376
ROC-AUC (OvR)  : 0.9774
PR-AUC (macro) : 0.6825

Classification Report (Test):
              precision    recall  f1-score   support

           0     0.5862    0.5667    0.5763        60
           1     0.7705    0.7833    0.7769        60
           2     0.6875    0.7333    0.7097        60
           3     0.6545    0.6000    0.6261        60
           4     0.7255    0.6167    0.6667        60
           5     0.4762    0.5000    0.4878        60
           6     0.6164    0.7500    0.6767        60
           7     0.6939    0.5667    0.6239        60
           8     0.4923    0.5333    0.5120        60
           9     0.4737    0.4500    0.4615        60
          10     0.6415    0.5667    0.6018        60
          11     0.6290    0.6500    0.6393        60
          12     0.8235    0.7000    0.7568        60
          13     0.4035    0.3833    0.3