Step 1: Import Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
from imblearn.combine import SMOTETomek


Step 2: Load Dataset

In [3]:
data = pd.read_csv("Creditcard_data.csv")
data.head()
data['Class'].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,763
1,9


Step 3: Split Features and Target

In [4]:
X = data.drop("Class", axis=1)
y = data["Class"]


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


Step 4: Define the Sampling Techniques

In [6]:
samplers = {
    "Sampling1_RandomUnder": RandomUnderSampler(),
    "Sampling2_RandomOver": RandomOverSampler(),
    "Sampling3_SMOTE": SMOTE(),
    "Sampling4_NearMiss": NearMiss(),
    "Sampling5_SMOTETomek": SMOTETomek()
}

Step 5: ML Models

In [7]:
models = {
    "M1_LogisticRegression": LogisticRegression(max_iter=1000),
    "M2_DecisionTree": DecisionTreeClassifier(),
    "M3_RandomForest": RandomForestClassifier(),
    "M4_KNN": KNeighborsClassifier(),
    "M5_SVM": SVC()
}


Step 6: Apply Models and Sampling

In [8]:
results = pd.DataFrame(index=models.keys(), columns=samplers.keys())

for samp_name, sampler in samplers.items():
    X_res, y_res = sampler.fit_resample(X_train, y_train)

    for model_name, model in models.items():
        model.fit(X_res, y_res)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        results.loc[model_name, samp_name] = round(acc * 100, 2)


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Step 7: Result

In [9]:
print(results)


                      Sampling1_RandomUnder Sampling2_RandomOver  \
M1_LogisticRegression                 47.41                91.81   
M2_DecisionTree                       77.16                98.71   
M3_RandomForest                       60.78                99.14   
M4_KNN                                60.78                97.84   
M5_SVM                                77.16                 87.5   

                      Sampling3_SMOTE Sampling4_NearMiss Sampling5_SMOTETomek  
M1_LogisticRegression            93.1              18.53                93.53  
M2_DecisionTree                 98.71              15.95                97.84  
M3_RandomForest                 99.14               55.6                98.71  
M4_KNN                          75.86              39.66                73.28  
M5_SVM                           44.4              38.79                45.69  
