In [2]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix, classification_report, roc_auc_score,
    precision_recall_curve, f1_score, auc
)
from imblearn.over_sampling import SMOTE
from sklearn.pipeline import Pipeline
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import StandardScaler

In [9]:
# 2. Data Preparation
data = pd.read_csv("/content/drive/MyDrive/creditcard.csv")
X = data.drop('Class', axis=1)
y = data['Class']

In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Apply SMOTE to balance the data
smote = SMOTE(random_state=42)
X_train_sm, y_train_sm = smote.fit_resample(X_train, y_train)

In [6]:
# 3. Pipeline Creation
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', RandomForestClassifier(random_state=42))
])
pipeline.fit(X_train_sm, y_train_sm)
y_pred = pipeline.predict(X_test)

In [7]:
# 4. Performance Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
roc_auc = roc_auc_score(y_test, pipeline.predict_proba(X_test)[:, 1])
print("ROC-AUC Score:", roc_auc)
precision, recall, _ = precision_recall_curve(y_test, pipeline.predict_proba(X_test)[:, 1])
pr_auc = auc(recall, precision)
print("PR-AUC Score:", pr_auc)

Confusion Matrix:
 [[56852    12]
 [   16    82]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.87      0.84      0.85        98

    accuracy                           1.00     56962
   macro avg       0.94      0.92      0.93     56962
weighted avg       1.00      1.00      1.00     56962

ROC-AUC Score: 0.9848733246815891
PR-AUC Score: 0.875020278384242


In [8]:
# 5. Classifier Calibration
calibrated_clf = CalibratedClassifierCV(
    pipeline.named_steps['classifier'], method='isotonic', cv='prefit'
)
calibrated_clf.fit(X_train_sm, y_train_sm)
y_calibrated_pred = calibrated_clf.predict(X_test)

# Performance after calibration
print("Confusion Matrix After Calibration:\n", confusion_matrix(y_test, y_calibrated_pred))



Confusion Matrix After Calibration:
 [[53366  3498]
 [   53    45]]
