In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, average_precision_score, classification_report
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

In [2]:
train_df = pd.read_csv('../../dataset/closed_world/closedworld_train.csv')
test_df  = pd.read_csv('../../dataset/closed_world/closedworld_test.csv')

In [3]:
# feature/target 분리
X = train_df.drop(columns=["label"]).values
y = train_df["label"].values

X_test = test_df.drop(columns=["label"]).values
y_test = test_df["label"].values

In [4]:
# Train MLP (Best param.)
mlp_model = MLPClassifier(
    hidden_layer_sizes=(128, 320, 448),
    activation='tanh',
    solver='adam',
    learning_rate_init=0.0003541348658851275,
    alpha=3.154728434953602e-05,
    batch_size=256,
    max_iter=300,
    random_state=42
)

In [5]:
# Train XGB (Best param.)
xgb_model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=95,  # label range: 0~94

    # === Optuna Best Params ===
    learning_rate=0.19721925924769965,
    max_depth=10,
    min_child_weight=4,
    subsample=0.7577072390567685,
    colsample_bytree=0.6494394276320198,
    gamma=0.06948732401595321,
    reg_lambda=3.2137571796403774,
    reg_alpha=0.446034681615163,

    # === Recommended add-ons ===
    n_estimators=300,          # boosting rounds
    eval_metric="mlogloss",
    tree_method="hist",
    random_state=42
)

In [6]:
K = 5
kf = KFold(n_splits=K, shuffle=True, random_state=42)

n_classes = 95
oof_mlp = np.zeros((len(X), n_classes))
oof_xgb = np.zeros((len(X), n_classes))

for train_idx, val_idx in kf.split(X):
    X_tr, X_val = X[train_idx], X[val_idx]
    y_tr, y_val = y[train_idx], y[val_idx]

    # ===== MLP =====
    mlp_model.fit(X_tr, y_tr)
    oof_mlp[val_idx] = mlp_model.predict_proba(X_val)

    # ===== XGBoost =====
    xgb_model.fit(X_tr, y_tr)
    oof_xgb[val_idx] = xgb_model.predict_proba(X_val)




In [7]:
# (N, 190) = (N, 95 probs from MLP + 95 probs from XGB)
train_meta = np.hstack([oof_mlp, oof_xgb])

meta_model = LogisticRegression(
    multi_class='multinomial',
    max_iter=500,
    n_jobs=-1
)

meta_model.fit(train_meta, y)

In [8]:
# Base model train on full data
mlp_model.fit(X, y)
xgb_model.fit(X, y)

# Base predictions
proba_mlp_test = mlp_model.predict_proba(X_test)
proba_xgb_test = xgb_model.predict_proba(X_test)

# Meta input
test_meta = np.hstack([proba_mlp_test, proba_xgb_test])

In [10]:
stack_proba = meta_model.predict_proba(test_meta)
stack_pred = np.argmax(stack_proba, axis=1)

acc = accuracy_score(y_test, stack_pred)
f1_macro = f1_score(y_test, stack_pred, average='macro')
f1_micro = f1_score(y_test, stack_pred, average='micro')
f1_weighted = f1_score(y_test, stack_pred, average='weighted')

roc_auc_macro = roc_auc_score(
    y_test,
    stack_proba,         # ← probability output of meta model
    multi_class='ovr',
    average='macro'
)

pr_auc_macro = average_precision_score(
    y_test,
    stack_proba,         # ← same probability matrix
    average='macro'
)

print("\n========== STACKING TEST RESULTS ==========")
print(f"Accuracy            : {acc:.6f}")
print(f"F1 (macro)          : {f1_macro:.6f}")
print(f"F1 (micro)          : {f1_micro:.6f}")
print(f"F1 (weighted)       : {f1_weighted:.6f}")

print(f"ROC-AUC (macro)     : {roc_auc_macro:.6f}")
print(f"PR-AUC (macro)      : {pr_auc_macro:.6f}")

print("\nClassification Report:")
print(classification_report(y_test, stack_pred, digits=4))


Accuracy            : 0.798246
F1 (macro)          : 0.797702
F1 (micro)          : 0.798246
F1 (weighted)       : 0.797702
ROC-AUC (macro)     : 0.985355
PR-AUC (macro)      : 0.852970

Classification Report:
              precision    recall  f1-score   support

           0     0.8302    0.7333    0.7788        60
           1     0.8852    0.9000    0.8926        60
           2     0.8983    0.8833    0.8908        60
           3     0.7778    0.8167    0.7967        60
           4     0.9375    0.7500    0.8333        60
           5     0.8958    0.7167    0.7963        60
           6     0.8154    0.8833    0.8480        60
           7     0.8421    0.8000    0.8205        60
           8     0.7541    0.7667    0.7603        60
           9     0.7091    0.6500    0.6783        60
          10     0.8298    0.6500    0.7290        60
          11     0.8727    0.8000    0.8348        60
          12     0.9219    0.9833    0.9516        60
          13     0.5167    0.516