# MLP

In [1]:
import time
import ast
import numpy as np
import pandas as pd
from scipy.sparse import hstack

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
    roc_auc_score,
    log_loss
)

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")

# Evaluation helper
from evaluation import *
from data_utils import *

In [2]:
# Load and verify data
train_df, test_df, y, class_names = load_and_prepare_data()

# Classical ML path
X_train, X_val, X_test, y_train, y_val, vecs = prepare_tfidf_pipeline(train_df, test_df, y)

print("TF-IDF Train shape:", X_train.shape)
print("Validation shape:", X_val.shape)

TF-IDF Train shape: (45981, 60000)
Validation shape: (11496, 60000)


In [3]:
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [4]:
# Reduce dimensionality (fit ONLY on X_train to avoid leakage)
svd_components = 512  # tune: 256–1024 are common sweet spots
svd = TruncatedSVD(n_components=svd_components, random_state=42)
Z_train = svd.fit_transform(X_train)
Z_val   = svd.transform(X_val)
Z_test  = svd.transform(X_test)

In [5]:
# Standardize dense features (fit on train only)
scaler = StandardScaler()
Z_train = scaler.fit_transform(Z_train)
Z_val   = scaler.transform(Z_val)
Z_test  = scaler.transform(Z_test)

In [6]:
# MLP params & training
mlp_model = MLPClassifier(
    hidden_layer_sizes=(512, 128),  # tune: e.g., (256,128) for smaller model
    activation="relu",
    solver="adam",
    alpha=1e-4,               # L2 regularization
    batch_size=256,
    learning_rate_init=1e-3,
    max_iter=60,
    early_stopping=True,
    n_iter_no_change=5,
    random_state=42,
    verbose=False
)

t0 = time.time()

mlp_model.fit(Z_train, y_train)

t1 = time.time()
print(f"⏱️ MLP training time: {(t1 - t0):.2f} seconds ({(t1 - t0)/60:.2f} minutes)")

⏱️ MLP training time: 12.49 seconds (0.21 minutes)


In [7]:
# Predictions (MLP)
y_proba_val_mlp = mlp_model.predict_proba(Z_val)
y_pred_val_mlp  = y_proba_val_mlp.argmax(axis=1)

# Test predictions + submission
y_proba_test_mlp = mlp_model.predict_proba(Z_test)
y_pred_test_mlp  = y_proba_test_mlp.argmax(axis=1)

In [8]:
print("\n================ MLP EVALUATION ================\n")
# Metrics
_ = eval_metrics(y_val, y_pred_val_mlp)
eval_classification_report(y_val, y_pred_val_mlp, class_names)
# ROC-AUC
_ = eval_roc_auc(y_val, y_proba_val_mlp)
# Log-loss
_ = eval_log_loss(y_val, y_proba_val_mlp)
_ = eval_log_loss_per_class(y_val, y_proba_val_mlp)



*** GLOBAL METRICS ***
Accuracy (Global)      : 0.4268
Precision (Macro Avg)  : 0.4250
Recall (Macro Avg)     : 0.4214
F1-Score (Macro Avg)   : 0.4170

*** PER-CLASS EVALUATION ***
Class                Precision    Recall  F1-Score   Support
------------------------------------------------------------
winner_model_a            0.43      0.52      0.47      4013
winner_model_b            0.44      0.47      0.45      3931
winner_tie                0.41      0.28      0.33      3552
------------------------------------------------------------
Macro Avg                 0.43      0.42      0.42     34488
Weighted Avg              0.43      0.43      0.42     34488

*** ROC-AUC EVALUATION ***
ROC-AUC (OvR) : 0.5986

*** LOG-LOSS EVALUATION ***
Log-loss      : 1.0651

*** LOG-LOSS PER CLASS ***
Class 0: 1.0422  (n=4013)
Class 1: 1.0288  (n=3931)
Class 2: 1.1310  (n=3552)


In [9]:
# Confusion Matrix + Plot
cm_mlp = eval_confusion_matrix(y_val, y_pred_val_mlp, n_classes=y_proba_val_mlp.shape[1])
plot_confusion_matrix(cm_mlp, class_names, title="Confusion Matrix — MLP", save_path="results/confusion_matrix/confusion_matrix_mlp.png")


Confusion Matrix (rows=true, cols=pred):
 [[2081 1220  712]
 [1409 1836  686]
 [1402 1161  989]]
Saved plot to: images/confusion_matrix/confusion_matrix_mlp.png


In [10]:
# ROC Curves
plot_roc_curves(y_val, y_proba_val_mlp, class_names, title_prefix="MLP ROC", save_path="results/roc/roc_mlp.png")

Saved plot to: images/roc/roc_mlp.png


In [11]:
save_roc_to_csv(y_val, y_proba_val_mlp, "MLP", fold_idx=1)

Saved ROC data for class 0 (AUC=0.6042) → results/roc/MLP_fold1_class0.csv
Saved ROC data for class 1 (AUC=0.6116) → results/roc/MLP_fold1_class1.csv
Saved ROC data for class 2 (AUC=0.5799) → results/roc/MLP_fold1_class2.csv


In [12]:
submission_mlp = build_submission(
    test_df=test_df,
    y_pred_test=y_pred_test_mlp,
    y_proba_test=y_proba_test_mlp,
    model_name="mlp"
)


Saved: results/submission/submission_mlp.csv
