In [1]:
pip install pandas scikit-learn scipy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import uniform, randint

In [3]:
credit_card_data=pd.read_csv("creditcard.csv.zip")

In [4]:
X = credit_card_data.drop('Class', axis=1)
y = credit_card_data['Class']

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
models_params = {
    'Logistic Regression': {
        'model': LogisticRegression(max_iter=1000),
        'params': {
            'C': uniform(0.01, 10),
            'penalty': ['l2'],
            'solver': ['lbfgs']
        }
    },
    'KNN': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': randint(3, 20),
            'weights': ['uniform', 'distance'],
            'metric': ['euclidean', 'manhattan']
        }
    },
    'Decision Tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini', 'entropy'],
            'max_depth': randint(5, 30),
            'min_samples_split': randint(2, 10)
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': randint(50, 200),
            'criterion': ['gini', 'entropy'],
            'max_depth': randint(5, 30),
            'min_samples_split': randint(2, 10)
        }
    }
}


In [7]:
for name, mp in models_params.items():
    print(f"\n{name} - Hyperparameter Tuning")
    rand_search = RandomizedSearchCV(
        estimator=mp['model'],
        param_distributions=mp['params'],
        n_iter=20,
        cv=3,
        scoring='f1',
        verbose=1,
        n_jobs=-1,
        random_state=42
    )
    rand_search.fit(X_train, y_train)
    
    print("Best Parameters:", rand_search.best_params_)

    y_pred = rand_search.predict(X_test)
    print("Classification Report:")
    print(classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))


Logistic Regression - Hyperparameter Tuning
Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best Parameters: {'C': 2.1333911067827613, 'penalty': 'l2', 'solver': 'lbfgs'}
Classification Report:
              precision    recall  f1-score   support

           0     0.9994    0.9998    0.9996     85307
           1     0.8384    0.6103    0.7064       136

    accuracy                         0.9992     85443
   macro avg     0.9189    0.8051    0.8530     85443
weighted avg     0.9991    0.9992    0.9991     85443

Confusion Matrix:
[[85291    16]
 [   53    83]]

KNN - Hyperparameter Tuning
Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
Classification Report:
              precision    recall  f1-score   support

           0     0.9988    1.0000    0.9994     85307
           1     1.0000    0.2647    0.4186       136

    accuracy                         0.9988     85443
   