In [6]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# data analysis and wrangling
# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import os
from ipywidgets import interact

# machine learning 
from sklearn.feature_selection import f_classif
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.metrics import confusion_matrix,roc_curve,accuracy_score,classification_report,make_scorer,f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.utils.class_weight import compute_class_weight
import MyClassAndFun as MCF
random_state=1

In [17]:
X_train_df=pd.read_csv('X_train.csv')
X_test_df=pd.read_csv('X_test.csv')
y_train=pd.read_csv('y_train.csv')
y_train = y_train['credit_score'] 
y_test=pd.read_csv('y_test.csv')
y_test = y_test['credit_score'] 
class_labels = np.unique(y_test)
class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=y_train)
unique, counts = np.unique(y_test, return_counts=True)
class_counts = dict(zip(unique, counts))

In [6]:
models = {
    "Random Forest": RandomForestClassifier(random_state=1,class_weight=dict(enumerate(class_weights))),
    "Decision Tree": DecisionTreeClassifier(random_state=1,class_weight=dict(enumerate(class_weights))),
    "Gradient Boosting": GradientBoostingClassifier(random_state=1),
    "XGBoost": XGBClassifier(random_state=1, class_weight=dict(enumerate(class_weights))),
    "CatBoost": CatBoostClassifier(verbose=False,random_state=1, class_weights=dict(enumerate(class_weights))),
    "AdaBoost": AdaBoostClassifier(random_state=1),
    "K-Nearest Neighbors": KNeighborsClassifier(),
}

params = {
    "Random Forest": {
        'n_estimators': [x for x in range(200,2000,200)],
        'max_features': ['log2', 'sqrt'],
        'max_depth':[None,10.0,20.0,30.0,40.0,50.0,
                     60.0,70.0,80.0,
                     90.0,100.0,110.0],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'bootstrap': [True, False],
    },
    "Decision Tree": {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
    },
    "Gradient Boosting": {
        'loss':['log_loss','exponential'],
        'criterion':['friedman_mse', 'squared_error'],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'learning_rate': [0.1, 0.01, 0.001],
        'n_estimators': [8, 16, 32, 64, 128, 256],
        'max_depth': [3, 4, 5],
        'subsample': [0.6, 0.7, 0.8, 0.9],
    },
    "XGBoost": {
        'learning_rate': [0.1, 0.01, 0.05, 0.001],
        'n_estimators': [8, 16, 32, 64, 128, 256],
        'max_depth': [3, 4, 5],
        'subsample': [0.6, 0.7, 0.8, 0.9],
    },
    "CatBoost": {
        'depth': [6, 8, 10],
        'learning_rate': [0.01, 0.05, 0.1],
        'iterations': [30, 50, 100],
    },
    "AdaBoost": {
        'learning_rate': [0.1, 0.01, 0.5, 0.001],
        'n_estimators': [8, 16, 32, 64, 128, 256],
    },
    "K-Nearest Neighbors": {
        'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12,15,20],
        'weights': ['auto', 'ball_tree', 'kd_tree', 'brute'],
        'p': [1, 2],
    },
}


f1_scorer = make_scorer(f1_score, average='weighted')

In [7]:
grid_searches = {}
for model_name, model in models.items():
    grid_search = GridSearchCV(
        model,
        params[model_name],
        cv=3,  # Use the number of desired cross-validation folds
        scoring=f1_scorer,
        n_jobs=-1,  # Use all available CPU cores
        verbose=2,
    )
    grid_searches[model_name] = grid_search

In [8]:
best_models = {}

for model_name, grid_search in grid_searches.items():
    grid_search.fit(X_train_df, y_train)  # X_train and y_train are your training data
    best_models[model_name] = grid_search.best_estimator_

Fitting 3 folds for each of 3888 candidates, totalling 11664 fits


10692 fits failed out of a total of 11664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
972 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidPa

Fitting 3 folds for each of 72 candidates, totalling 216 fits
Fitting 3 folds for each of 7776 candidates, totalling 23328 fits


11664 fits failed out of a total of 23328.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
11664 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 431, in fit
    self._check_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 276, in _check_params
    self._loss = loss_class(self.n_classes_

Fitting 3 folds for each of 288 candidates, totalling 864 fits
Parameters: { "class_weight" } are not used.

Fitting 3 folds for each of 27 candidates, totalling 81 fits
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Fitting 3 folds for each of 112 candidates, totalling 336 fits


ValueError: 
All the 336 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
37 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'distance', 'uniform'}, a callable or None. Got 'auto' instead.

--------------------------------------------------------------------------------
34 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'distance', 'uniform'}, a callable or None. Got 'ball_tree' instead.

--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'uniform', 'distance'}, a callable or None. Got 'ball_tree' instead.

--------------------------------------------------------------------------------
56 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'uniform', 'distance'}, a callable or None. Got 'kd_tree' instead.

--------------------------------------------------------------------------------
28 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'distance', 'uniform'}, a callable or None. Got 'brute' instead.

--------------------------------------------------------------------------------
56 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'uniform', 'distance'}, a callable or None. Got 'brute' instead.

--------------------------------------------------------------------------------
47 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'uniform', 'distance'}, a callable or None. Got 'auto' instead.

--------------------------------------------------------------------------------
28 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\elper\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'weights' parameter of KNeighborsClassifier must be a str among {'distance', 'uniform'}, a callable or None. Got 'kd_tree' instead.


In [9]:
best_models

{'Random Forest': RandomForestClassifier(bootstrap=False,
                        class_weight={0: 1.1495133726722355,
                                      1: 0.6268675428881877,
                                      2: 1.869741352446245},
                        max_features='log2', min_samples_leaf=2,
                        n_estimators=1800, random_state=1),
 'Decision Tree': DecisionTreeClassifier(class_weight={0: 1.1495133726722355,
                                      1: 0.6268675428881877,
                                      2: 1.869741352446245},
                        criterion='entropy', max_depth=30, random_state=1),
 'Gradient Boosting': GradientBoostingClassifier(criterion='squared_error', max_depth=5,
                            min_samples_split=5, n_estimators=256,
                            random_state=1, subsample=0.8),
 'XGBoost': XGBClassifier(base_score=None, booster=None, callbacks=None,
               class_weight={0: 1.1495133726722355, 1: 0.626867542888

In [13]:
model_name

'K-Nearest Neighbors'

In [14]:
best_f1_score = -1  # Initialize with a low value
best_model = None

for model_name, grid_search in grid_searches.items():
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best F1-score for {model_name}: {grid_search.best_score_}")
    print("=="*25,"\n")

    # if grid_search.best_score_ > best_f1_score:
    #     best_f1_score = grid_search.best_score_
    #     best_model = grid_search.best_estimator_

    # if best_model is not None:
    #     print("Best model based on F1-score:")
    #     print(best_model)
    #     print(f"Best F1-score: {best_f1_score}")

Best parameters for Random Forest: {'bootstrap': False, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 1800}
Best F1-score for Random Forest: 0.8143091995891129

Best parameters for Decision Tree: {'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2}
Best F1-score for Decision Tree: 0.7909916895619317

Best parameters for Gradient Boosting: {'criterion': 'squared_error', 'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 256, 'subsample': 0.8}
Best F1-score for Gradient Boosting: 0.7676861760696662

Best parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 256, 'subsample': 0.6}
Best F1-score for XGBoost: 0.7531918311131082

Best parameters for CatBoost: {'depth': 10, 'iterations': 100, 'learning_rate': 0.1}
Best F1-score for CatBoost: 0.7272042153735448

Best parameters for AdaBoost: {'learning_rat

AttributeError: 'GridSearchCV' object has no attribute 'best_params_'