In [None]:
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
import seaborn as sns

In [None]:
import numpy as np

In [None]:
data = pd.read_csv(r'/content/drive/MyDrive/AmEx/creditcard.csv')

In [None]:
X = data.drop(['Class'], axis=1)
y = data['Class']
X = (X - X.mean()) / X.std()


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
lr = LogisticRegression(random_state=42)


In [None]:
dt = DecisionTreeClassifier(random_state=42)


In [None]:
rf = RandomForestClassifier(random_state=42)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC(probability=True, random_state=42)

In [None]:
ensemble = VotingClassifier(estimators=[('lr', lr), ('dt', dt), ('rf', rf), ('svm', svm)], voting='soft')

In [None]:
lr.fit(X_train, y_train)

In [None]:
dt.fit(X_train, y_train)

In [None]:
rf.fit(X_train, y_train)

In [None]:
ensemble.fit(X_train, y_train)

In [None]:
models = [lr, dt, rf,ensemble]
for model in models:
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    auc_score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1])
    print(f"{model.__class__.__name__} accuracy: {accuracy:.3f}, AUC score: {auc_score:.3f}\n")
    print(f"Confusion matrix:\n{confusion_matrix(y_test, y_pred)}\n")
    print(f"Classification report:\n{classification_report(y_test, y_pred)}\n")

LogisticRegression accuracy: 0.999, AUC score: 0.975

Confusion matrix:
[[56855     9]
 [   41    57]]

Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.86      0.58      0.70        98

    accuracy                           1.00     56962
   macro avg       0.93      0.79      0.85     56962
weighted avg       1.00      1.00      1.00     56962


DecisionTreeClassifier accuracy: 0.999, AUC score: 0.898

Confusion matrix:
[[56830    34]
 [   20    78]]

Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.70      0.80      0.74        98

    accuracy                           1.00     56962
   macro avg       0.85      0.90      0.87     56962
weighted avg       1.00      1.00      1.00     56962


RandomForestClassifier accuracy: 1.000, AUC score: 0.948

Confusion matrix:
[[56862   

In [None]:
y_pred = ensemble.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
auc_score = roc_auc_score(y_test, ensemble.predict_proba(X_test)[:,1])
print(f"Ensemble accuracy: {accuracy:.3f}, AUC score: {auc_score:.3f}\n")
print(f"Confusion matrix:\n{confusion_matrix(y_test, y_pred)}\n")
print(f"Classification report:\n{classification_report(y_test, y_pred)}\n")

In [None]:
from sklearn.model_selection import StratifiedKFold

model = RandomForestClassifier(random_state=42)

# Perform stratified cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)):
    # Split the data into training and testing sets for this fold
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]

    # Fit the model on the training set for this fold
    model.fit(X_train, y_train)

    # Evaluate the model on the testing set for this fold
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    auc_score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1])
    print(f"Fold {fold+1} accuracy: {accuracy:.3f}, AUC score: {auc_score:.3f}")

Fold 1 accuracy: 0.999, AUC score: 0.933
Fold 2 accuracy: 1.000, AUC score: 0.963
Fold 3 accuracy: 1.000, AUC score: 0.953
Fold 4 accuracy: 1.000, AUC score: 0.947
Fold 5 accuracy: 1.000, AUC score: 0.937


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
