In [1]:
pip install scikit-optimize

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

In [3]:
credit_card_data=pd.read_csv("creditcard.csv.zip")

In [4]:
X = credit_card_data.drop('Class', axis=1)
y = credit_card_data['Class']

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
models_params = {
    'Logistic Regression': {
        'model': LogisticRegression(max_iter=1000),
        'search_space': {
            'C': Real(1e-2, 1e+2, prior='log-uniform'),
            'solver': Categorical(['lbfgs']),
            'penalty': Categorical(['l2'])
        }
    },
    'KNN': {
        'model': KNeighborsClassifier(),
        'search_space': {
            'n_neighbors': Integer(3, 30),
            'weights': Categorical(['uniform', 'distance']),
            'metric': Categorical(['euclidean', 'manhattan'])
        }
    },
    'Decision Tree': {
        'model': DecisionTreeClassifier(),
        'search_space': {
            'max_depth': Integer(3, 30),
            'min_samples_split': Integer(2, 10),
            'criterion': Categorical(['gini', 'entropy'])
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'search_space': {
            'n_estimators': Integer(50, 200),
            'max_depth': Integer(5, 30),
            'min_samples_split': Integer(2, 10),
            'criterion': Categorical(['gini', 'entropy'])
        }
    }
}

In [7]:
for name, mp in models_params.items():
    print(f"\n{name} - Bayesian Hyperparameter Tuning")
    
    bayes_search = BayesSearchCV(
        estimator=mp['model'],
        search_spaces=mp['search_space'],
        n_iter=20,
        scoring='f1',
        cv=3,
        n_jobs=-1,
        verbose=1,
        random_state=42
    )
    
    bayes_search.fit(X_train, y_train)
    
    print("Best Parameters:", bayes_search.best_params_)
    y_pred = bayes_search.predict(X_test)
    
    print("Classification Report:")
    print(classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))


Logistic Regression - Bayesian Hyperparameter Tuning
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fi



Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best Parameters: OrderedDict([('criterion', 'entropy'), ('max_depth', 6), ('min_samples_split', 2)])
Classification Report:
              precision    recall  f1-score   support

           0     0.9997    0.9997    0.9997     85307
           1     0.8175    0.8235    0.8205       136

    accuracy                         0.9994     85443
   macro avg     0.9086    0.9116    0.9101     85443
weighted avg     0.9994    0.9994    0.9994     85443

Confusion Matrix:
[[85282    25]
 [   24   112]]

Random Forest - Bayesian Hyperparameter Tuning
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidate