In [6]:
!pip install imbalanced-learn




In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN


In [8]:
import pandas as pd

data = pd.read_csv("/content/Creditcard_data (1).csv")

print(data['Class'].value_counts())


Class
0    763
1      9
Name: count, dtype: int64


In [10]:
X = data.drop('Class', axis=1)
y = data['Class']

smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)

print("Balanced class distribution:")
print(pd.Series(y_balanced).value_counts())


Balanced class distribution:
Class
0    763
1    763
Name: count, dtype: int64


In [11]:
scaler = StandardScaler()
X_balanced_scaled = scaler.fit_transform(X_balanced)


In [13]:
samplers = {
    "Sampling1": RandomUnderSampler(random_state=42),
    "Sampling2": RandomOverSampler(random_state=42),
    "Sampling3": SMOTE(random_state=42),
    "Sampling4": SMOTEENN(random_state=42),
    "Sampling5": None
}


In [14]:
models = {
    "M1": LogisticRegression(max_iter=1000),
    "M2": DecisionTreeClassifier(),
    "M3": RandomForestClassifier(),
    "M4": KNeighborsClassifier(),
    "M5": GaussianNB()
}


In [15]:
results = pd.DataFrame(index=models.keys(), columns=samplers.keys())

for s_name, sampler in samplers.items():

    if sampler is not None:
        X_sampled, y_sampled = sampler.fit_resample(
            X_balanced_scaled, y_balanced
        )
    else:
        X_sampled, y_sampled = X_balanced_scaled, y_balanced

    X_train, X_test, y_train, y_test = train_test_split(
        X_sampled, y_sampled, test_size=0.3, random_state=42
    )

    for m_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred) * 100
        results.loc[m_name, s_name] = round(acc, 2)


In [16]:
print("Accuracy comparison table:")
results


Accuracy comparison table:


Unnamed: 0,Sampling1,Sampling2,Sampling3,Sampling4,Sampling5
M1,91.27,90.61,90.61,95.46,90.61
M2,98.03,98.25,98.25,97.96,98.25
M3,99.78,99.34,99.13,99.09,99.34
M4,95.41,95.2,95.2,97.51,95.2
M5,86.24,84.5,84.5,88.21,84.5


In [17]:
best_combo = results.stack().astype(float).idxmax()
best_accuracy = results.stack().astype(float).max()

print("Best Model:", best_combo[0])
print("Best Sampling Technique:", best_combo[1])
print("Highest Accuracy:", best_accuracy)


Best Model: M3
Best Sampling Technique: Sampling1
Highest Accuracy: 99.78
