In [16]:
import pandas as pd
import joblib

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score


In [17]:
X_train = pd.read_csv("../data/processed/X_train.csv")
X_val = pd.read_csv("../data/processed/X_val.csv")

y_train = pd.read_csv("../data/processed/y_train.csv").values.ravel()
y_val = pd.read_csv("../data/processed/y_val.csv").values.ravel()

print("Data loaded successfully")


Data loaded successfully


In [18]:
dt = DecisionTreeClassifier(max_depth=6, random_state=42)
dt.fit(X_train, y_train)

dt_prob = dt.predict_proba(X_val)[:, 1]
dt_auc = roc_auc_score(y_val, dt_prob)

joblib.dump(dt, "../models/decision_tree.pkl")

dt_auc


0.7099474266740454

In [19]:
rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

rf_prob = rf.predict_proba(X_val)[:, 1]
rf_auc = roc_auc_score(y_val, rf_prob)

joblib.dump(rf, "../models/random_forest.pkl")

rf_auc


0.7203323879358052

In [20]:
gb = GradientBoostingClassifier(
    n_estimators=200,
    learning_rate=0.05,
    random_state=42
)

gb.fit(X_train, y_train)

gb_prob = gb.predict_proba(X_val)[:, 1]
gb_auc = roc_auc_score(y_val, gb_prob)

joblib.dump(gb, "../models/gradient_boosting.pkl")

gb_auc


0.7575487686773659

In [21]:
nn = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    max_iter=300,
    random_state=42
)

nn.fit(X_train, y_train)

nn_prob = nn.predict_proba(X_val)[:, 1]
nn_auc = roc_auc_score(y_val, nn_prob)

joblib.dump(nn, "../models/neural_network.pkl")

nn_auc




0.7230734643054787

In [22]:
comparison = pd.DataFrame({
    "Model": [
        "Decision Tree",
        "Random Forest",
        "Gradient Boosting",
        "Neural Network"
    ],
    "ROC_AUC": [
        dt_auc,
        rf_auc,
        gb_auc,
        nn_auc
    ]
})

comparison


Unnamed: 0,Model,ROC_AUC
0,Decision Tree,0.709947
1,Random Forest,0.720332
2,Gradient Boosting,0.757549
3,Neural Network,0.723073


In [23]:
comparison.to_csv("../models/advanced_model_comparison.csv", index=False)
print("Model comparison saved successfully")


Model comparison saved successfully
