In [1]:
import pandas as pd
import numpy as np



In [2]:
# Load feature-engineered data
X_train = pd.read_csv("../data/X_train.csv")
X_test  = pd.read_csv("../data/X_test.csv")
y_train = pd.read_csv("../data/y_train.csv").values.ravel()
y_test  = pd.read_csv("../data/y_test.csv").values.ravel()

print(X_train.shape, y_train.shape)


(81412, 14) (81412,)


In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [4]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier(
    hidden_layer_sizes=(128, 64, 32),
    activation='relu',
    solver='adam',
    max_iter=50,
    random_state=42,
    verbose=True
)

model.fit(X_train_scaled, y_train)


Iteration 1, loss = 0.34148466
Iteration 2, loss = 0.33696392
Iteration 3, loss = 0.33638099
Iteration 4, loss = 0.33611193
Iteration 5, loss = 0.33540427
Iteration 6, loss = 0.33515794
Iteration 7, loss = 0.33488475
Iteration 8, loss = 0.33449908
Iteration 9, loss = 0.33408648
Iteration 10, loss = 0.33373767
Iteration 11, loss = 0.33345908
Iteration 12, loss = 0.33268113
Iteration 13, loss = 0.33253161
Iteration 14, loss = 0.33180318
Iteration 15, loss = 0.33159142
Iteration 16, loss = 0.33100847
Iteration 17, loss = 0.33035436
Iteration 18, loss = 0.32999281
Iteration 19, loss = 0.32964797
Iteration 20, loss = 0.32894185
Iteration 21, loss = 0.32822179
Iteration 22, loss = 0.32769501
Iteration 23, loss = 0.32704224
Iteration 24, loss = 0.32672376
Iteration 25, loss = 0.32589147
Iteration 26, loss = 0.32553367
Iteration 27, loss = 0.32468660
Iteration 28, loss = 0.32358061
Iteration 29, loss = 0.32326415
Iteration 30, loss = 0.32240600
Iteration 31, loss = 0.32136671
Iteration 32, los



In [5]:
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))
print(classification_report(y_test, y_pred))


Accuracy: 0.8837083619927287
ROC AUC: 0.5963106223850184
              precision    recall  f1-score   support

           0       0.89      0.99      0.94     18083
           1       0.30      0.03      0.06      2271

    accuracy                           0.88     20354
   macro avg       0.59      0.51      0.50     20354
weighted avg       0.82      0.88      0.84     20354



In [6]:
import joblib

joblib.dump(model, "../model/ann_model.pkl")
joblib.dump(scaler, "../model/scaler.pkl")
joblib.dump(list(X_train.columns), "../model/feature_names.pkl")

print("✅ Model artifacts saved successfully")


✅ Model artifacts saved successfully
