In [1]:
!pip install -r requirements.txt




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    roc_curve,
    precision_recall_curve,
    f1_score,
    precision_score,
    recall_score,
    accuracy_score
)
from sklearn.model_selection import (StratifiedKFold, cross_validate, GridSearchCV)
warnings.filterwarnings('ignore')

### Load Data

In [3]:
X_train = np.load('artifacts/X_train.npz')['X_train']
X_test = np.load('artifacts/X_test.npz')['X_train']
Y_train = np.load('artifacts/Y_train.npz')['X_train']
Y_test = np.load('artifacts/Y_test.npz')['X_train']

### Define Multi-Models

In [4]:
lr_param_grid = {
    'max_iter': [1000, 5000, 10000],
}
dt_param_grid = {
    'max_depth': [8, 12, 16, 20],
    'criterion': ["gini", "entropy", "log_loss"],
}
rf_param_grid = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [8, 12, 16, 20],
    'criterion': ["gini", "entropy", "log_loss"],
}

param_grids = {
    'Logistic Regression': lr_param_grid,
    'Decision Tree': dt_param_grid,
    'Random Forest': rf_param_grid
}

In [5]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
}

### Configure K-Fold Cross-Validation

In [6]:
cv = StratifiedKFold(n_splits=6, shuffle=True, random_state=42)

### Multi-Model Training with Hyperparameter Tuning

In [7]:
grid_search_results = {}
for model_name, model in models.items():
    param_grid = param_grids[model_name]

    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=cv,
        scoring='f1',
        verbose=1,
        return_train_score=True,
    )

    print(f"Training {model_name} with hyperparameter tuning...")
    grid_search.fit(X_train, Y_train)

    grid_search_results[model_name] = grid_search
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")

Training Logistic Regression with hyperparameter tuning...
Fitting 6 folds for each of 3 candidates, totalling 18 fits
Best parameters for Logistic Regression: {'max_iter': 1000}
Training Decision Tree with hyperparameter tuning...
Fitting 6 folds for each of 12 candidates, totalling 72 fits
Best parameters for Decision Tree: {'criterion': 'entropy', 'max_depth': 20}
Training Random Forest with hyperparameter tuning...
Fitting 6 folds for each of 48 candidates, totalling 288 fits
Best parameters for Random Forest: {'criterion': 'log_loss', 'max_depth': 20, 'n_estimators': 100}
