pip install pandas numpy scikit-learn seaborn matplotlib imbalanced-learn


Download Dataset 
https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

from imblearn.over_sampling import SMOTE

In [None]:
#  1. Load Dataset 
df = pd.read_csv("creditcard.csv")
print(df['Class'].value_counts())  # Class 1 = Fraud, Class 0 = Normal

#  2. Visualize Class Distribution 
sns.countplot(x='Class', data=df)
plt.title("Class Distribution")
plt.show()


In [None]:
#  3. Feature Scaling 
scaler = StandardScaler()
df['scaled_amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))
df['scaled_time'] = scaler.fit_transform(df['Time'].values.reshape(-1, 1))
df.drop(['Amount', 'Time'], axis=1, inplace=True)

)

In [None]:

#  5. Split Data 
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42

In [None]:
#  6. Handle Imbalanced Dataset with SMOTE 
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)
print("After SMOTE:", pd.Series(y_res).value_counts())

#  7. Train Model 
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_res, y_res)

In [None]:
#  8. Evaluate Model 
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("\nClassification Report:\n", classification_report(y_test, y_pred))
conf_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='YlGnBu')
plt.title("Confusion Matrix")
plt.show()

In [None]:
#  9. ROC AUC 
roc_auc = roc_auc_score(y_test, y_proba)
fpr, tpr, _ = roc_curve(y_test, y_proba)

plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.4f}")
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()