In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import resample

# Load data
df = pd.read_csv("/content/Creditcard_data.csv")
X = df.drop("Class", axis=1)
y = df["Class"]

scaler = StandardScaler()
X = scaler.fit_transform(X)

# Models
models = {
    "M1": LogisticRegression(max_iter=1000),
    "M2": DecisionTreeClassifier(),
    "M3": RandomForestClassifier(),
    "M4": SVC(),
    "M5": KNeighborsClassifier()
}

results = []

# Simple Random Sampling
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, random_state=42)
for m, model in models.items():
    model.fit(X_tr, y_tr)
    results.append([m, "Simple Random", accuracy_score(y_te, model.predict(X_te))])

# Stratified Sampling
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=42)
for tr, te in sss.split(X, y):
    X_tr, X_te = X[tr], X[te]
    y_tr, y_te = y.iloc[tr], y.iloc[te]

for m, model in models.items():
    model.fit(X_tr, y_tr)
    results.append([m, "Stratified", accuracy_score(y_te, model.predict(X_te))])

# Cluster Sampling
df["Cluster"] = df.index % 5
cluster_df = df[df["Cluster"].isin([0, 1, 2])]

X_c = scaler.fit_transform(cluster_df.drop(["Class", "Cluster"], axis=1))
y_c = cluster_df["Class"]

X_tr, X_te, y_tr, y_te = train_test_split(X_c, y_c, test_size=0.3, random_state=42)
for m, model in models.items():
    model.fit(X_tr, y_tr)
    results.append([m, "Cluster", accuracy_score(y_te, model.predict(X_te))])

# K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for m, model in models.items():
    acc = []
    for tr, te in kf.split(X):
        model.fit(X[tr], y.iloc[tr])
        acc.append(accuracy_score(y.iloc[te], model.predict(X[te])))
    results.append([m, "K-Fold", np.mean(acc)])

# Bootstrap Sampling
X_b, y_b = resample(X, y, replace=True, n_samples=len(y), random_state=42)
X_tr, X_te, y_tr, y_te = train_test_split(X_b, y_b, test_size=0.3, random_state=42)

for m, model in models.items():
    model.fit(X_tr, y_tr)
    results.append([m, "Bootstrap", accuracy_score(y_te, model.predict(X_te))])

# Results table
df_results = pd.DataFrame(results, columns=["Model", "Sampling", "Accuracy"])
final_table = df_results.pivot(index="Model", columns="Sampling", values="Accuracy")
print(final_table)


Sampling  Bootstrap   Cluster    K-Fold  Simple Random  Stratified
Model                                                             
M1         0.974138  0.992857  0.984491       0.982759    0.987069
M2         0.991379  0.957143  0.971521       0.969828    0.982759
M3         0.987069  0.992857  0.988362       0.987069    0.987069
M4         0.974138  0.992857  0.988362       0.987069    0.987069
M5         0.974138  0.992857  0.988362       0.987069    0.987069
