In [20]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
	StratifiedKFold,
	GridSearchCV,
)
from sklearn.metrics import (
	confusion_matrix,
)
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

warnings.filterwarnings("ignore")

#### Load my data

In [21]:
X_train = np.load("artifacts/X_train.npz")["arr_0"]
Y_train = np.load("artifacts/y_train.npz")["arr_0"]
X_test = np.load("artifacts/X_test.npz")["arr_0"]
Y_test = np.load("artifacts/y_test.npz")["arr_0"]

#### Define multi models

In [22]:
lr_param_grid = {
    "max_iter" : [1000, 5000, 10000] #training 1 models
}

dt_param_grid = {
    "max_depth": [8, 12, 16, 20], #2 variables - training 4 * 3 models
    "criterion": ["gini", "entropy", "log_loss"]
}

rf_param_grid = {
    "n_estimators": [50, 100, 150, 200], #3 variables - training 4 * 4 * 3 models
    "max_depth": [8, 12, 16, 20],
    "criterion": ["gini", "entropy", "log_loss"]
}

param_grids = {
    "Logistic Regression": lr_param_grid,
    "Random Forest": rf_param_grid,
    "Decision Tree": dt_param_grid
}

#### Define multi models

In [23]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
}


#### Configure K-Fold cv

In [24]:
cv = StratifiedKFold(n_splits=6, shuffle=True, random_state=42)

#### Multi model training

In [26]:
grid_search_results = {}
for model_name, model in models.items():
    print(f"\n--- Tunning {model_name} ---")
    param_grid = param_grids[model_name]
    
    grid_search = GridSearchCV(
		estimator=model,
		param_grid=param_grid,
		cv=cv,
		scoring="f1",
		verbose=1,
		return_train_score=False
	)
    print(f"Fitting gridSearchCv for {model_name}")
    
    grid_search.fit(X_train, Y_train)
    
    grid_search_results[model_name] = grid_search
    
    print(f"{model_name} gridSearchCv completed ...")
    
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best cv score: {grid_search.best_score_}")



--- Tunning Logistic Regression ---
Fitting gridSearchCv for Logistic Regression
Fitting 6 folds for each of 3 candidates, totalling 18 fits
Logistic Regression gridSearchCv completed ...
Best parameters: {'max_iter': 1000}
Best cv score: 0.7318169755021501

--- Tunning Random Forest ---
Fitting gridSearchCv for Random Forest
Fitting 6 folds for each of 48 candidates, totalling 288 fits
Random Forest gridSearchCv completed ...
Best parameters: {'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 200}
Best cv score: 0.8941965818377816

--- Tunning Decision Tree ---
Fitting gridSearchCv for Decision Tree
Fitting 6 folds for each of 12 candidates, totalling 72 fits
Decision Tree gridSearchCv completed ...
Best parameters: {'criterion': 'log_loss', 'max_depth': 16}
Best cv score: 0.8332755250325262
