In [None]:
# Credit Card Fraud Detection

# 📌 Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, precision_recall_curve

# 📌 Load Dataset
data = pd.read_csv("creditcard.csv")

# 📌 Data Exploration
print(data.head())
print(data.info())
print(data['Class'].value_counts())

In [None]:
# 📌 Visualize Class Distribution
plt.figure(figsize=(6,4))
sns.countplot(x='Class', data=data)
plt.title('Class Distribution (0 = Non-Fraud, 1 = Fraud)')
plt.show()


In [None]:
# 📌 Transaction Amount Distribution for Fraud vs Non-Fraud
plt.figure(figsize=(10,6))
sns.histplot(data[data['Class']==0]['Amount'], bins=50, color='green', label='Non-Fraud', alpha=0.6)
sns.histplot(data[data['Class']==1]['Amount'], bins=50, color='red', label='Fraud', alpha=0.6)
plt.legend()
plt.title('Transaction Amount Distribution')
plt.xlabel('Amount')
plt.ylabel('Frequency')
plt.show()

In [None]:
# 📌 Data Preprocessing
# Drop Time column
data = data.drop(['Time'], axis=1)

# Scale Amount column
scaler = StandardScaler()
data['Amount'] = scaler.fit_transform(data[['Amount']])


In [None]:
# 📌 Split Data
X = data.drop('Class', axis=1)
y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# 📌 Model Training: Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
y_prob_lr = lr.predict_proba(X_test)[:,1]


In [None]:
# 📌 Model Training: Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_prob_rf = rf.predict_proba(X_test)[:,1]


In [None]:
# 📌 Evaluation Function
def evaluate_model(y_test, y_pred, y_prob, model_name):
    print(f"---- {model_name} ----")
    print(classification_report(y_test, y_pred))
    print("ROC AUC Score:", roc_auc_score(y_test, y_prob))
    
    # Confusion Matrix Heatmap
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

# 📌 Evaluate Logistic Regression
evaluate_model(y_test, y_pred_lr, y_prob_lr, "Logistic Regression")


In [None]:
# 📌 Evaluate Random Forest
evaluate_model(y_test, y_pred_rf, y_prob_rf, "Random Forest")

# 📌 ROC Curve for Both Models
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_prob_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_prob_rf)

plt.figure(figsize=(8,6))
plt.plot(fpr_lr, tpr_lr, label='Logistic Regression')
plt.plot(fpr_rf, tpr_rf, label='Random Forest')
plt.plot([0,1],[0,1],'k--')
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()


In [None]:
# 📌 Precision-Recall Curve for Both Models
precision_lr, recall_lr, _ = precision_recall_curve(y_test, y_prob_lr)
precision_rf, recall_rf, _ = precision_recall_curve(y_test, y_prob_rf)

plt.figure(figsize=(8,6))
plt.plot(recall_lr, precision_lr, label='Logistic Regression')
plt.plot(recall_rf, precision_rf, label='Random Forest')
plt.title('Precision-Recall Curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend()
plt.show()
