In [1]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree   import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    StratifiedKFold,
    cross_validate,
    GridSearchCV,
)
from sklearn.metrics import (
    confusion_matrix,
    f1_score,
    precision_score,
    accuracy_score,
    recall_score
)
warnings.filterwarnings('ignore')

### 1. Loading Data

In [2]:
X_train = np.load('Artifacts/X_train.npz')['arr_0']
Y_train = np.load('Artifacts/Y_train.npz')['arr_0']
X_test = np.load('Artifacts/X_test.npz')['arr_0']
Y_test = np.load('Artifacts/Y_test.npz')['arr_0']

### 2. Define Parameter Grid

In [3]:
# Define parameter grids for hyperparameter tuning

lr_param_grid ={
    'max_iter': [1000, 5000, 10000],
}

dt_param_grid = {
    'max_depth': [8,12,16, 20],
    'criterion': ['gini', 'entropy', 'log_loss'],
}

rf_param_grid = {
    'n_estimators': [100],
    'max_depth': [8,12],
    'criterion': ['gini', 'entropy', 'log_loss'],
}

param_grids ={
    'logistic_regression': lr_param_grid,
    'decision_tree': dt_param_grid,
    'random_forest': rf_param_grid
}

### 3. Define Multiple Models

In [4]:
models ={
    'logistic_regression': LogisticRegression(),
    'decision_tree': DecisionTreeClassifier(),
    'random_forest': RandomForestClassifier()
    
}

### 4. Configure K-Fold CV

In [5]:
cv = StratifiedKFold(
    n_splits=6,
    random_state=42,
    shuffle=True

)

In [6]:
grid_search_results = {}
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")

    param_grid = param_grids[model_name]

    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        scoring='f1',
        cv=cv,
        verbose=1,
        return_train_score=False
    )

    print(f"Fitting grid search for {model_name}")
    grid_search.fit(X_train, Y_train)

    grid_search_results[model_name] = grid_search

    print(f"{model_name} best parameters: {grid_search.best_params_}")
    print(f"{model_name} best score: {grid_search.best_score_:.4f}")


Training logistic_regression...
Fitting grid search for logistic_regression
Fitting 6 folds for each of 3 candidates, totalling 18 fits
logistic_regression best parameters: {'max_iter': 1000}
logistic_regression best score: 0.7977

Training decision_tree...
Fitting grid search for decision_tree
Fitting 6 folds for each of 12 candidates, totalling 72 fits
decision_tree best parameters: {'criterion': 'entropy', 'max_depth': 12}
decision_tree best score: 0.8039

Training random_forest...
Fitting grid search for random_forest
Fitting 6 folds for each of 6 candidates, totalling 36 fits
random_forest best parameters: {'criterion': 'gini', 'max_depth': 12, 'n_estimators': 100}
random_forest best score: 0.8465
