# 💳 Credit Card Fraud Detection Project
Unique machine learning project for detecting fraudulent transactions.

In [None]:
# 📁 Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")

In [None]:
# 📂 Step 2: Load and Explore Dataset
df = pd.read_csv("creditcard.csv")
print(df.shape)
print(df.head())
print(df['Class'].value_counts())

In [None]:
# 📊 Step 3: Data Visualization
plt.figure(figsize=(6,4))
sns.countplot(data=df, x='Class', palette='Set2')
plt.title("Class Distribution")
plt.xticks([0,1], ['Genuine (0)', 'Fraud (1)'])
plt.show()

corr = df.corr()
top_corr = corr['Class'].abs().sort_values(ascending=False)[1:6].index
sns.heatmap(df[top_corr].corr(), annot=True, cmap='coolwarm')
plt.title("Top Correlated Features with Fraud")
plt.show()

In [None]:
# 🧹 Step 4: Preprocessing & Handling Class Imbalance
X = df.drop('Class', axis=1)
y = df['Class']
X['Amount'] = StandardScaler().fit_transform(X['Amount'].values.reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)
print(f"Before SMOTE: {y_train.value_counts().to_dict()}")
print(f"After SMOTE: {y_res.value_counts().to_dict()}")

In [None]:
# 🤖 Step 5: Train Logistic Regression
lr = LogisticRegression()
lr.fit(X_res, y_res)
y_pred_lr = lr.predict(X_test)

In [None]:
# 🤖 Step 5: Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_res, y_res)
y_pred_rf = rf.predict(X_test)

In [None]:
# 📈 Step 6: Evaluation Function
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n--- {model_name} Evaluation ---")
    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred))
    print(f"Precision: {precision_score(y_true, y_pred):.4f}")
    print(f"Recall:    {recall_score(y_true, y_pred):.4f}")
    print(f"F1 Score:  {f1_score(y_true, y_pred):.4f}")

evaluate_model(y_test, y_pred_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")

In [None]:
# 📊 Step 7: Confusion Matrix Plot
def plot_conf_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(title)
    plt.show()

plot_conf_matrix(y_test, y_pred_rf, "Random Forest - Confusion Matrix")