# 🛡️ Credit Card Fraud Detection
This notebook analyzes 284,807 credit card transactions to identify fraudulent activity using EDA, feature engineering, balancing, and machine learning models.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('creditcard.csv')
df.head()

In [None]:
# Class balance
sns.countplot(x='Class', data=df)
plt.title('Class Distribution');

In [None]:
# Feature Engineering: Time of Day & Amount Bin
df['Hour'] = df['Time'].apply(lambda x: np.floor(x / 3600) % 24)
df['AmountBin'] = pd.qcut(df['Amount'], q=4, labels=['Low', 'Mid-Low', 'Mid-High', 'High'])
df[['Hour', 'AmountBin']].head()

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 6))
sns.heatmap(df.corr(), cmap='coolwarm', center=0)
plt.title('Correlation Heatmap');

In [None]:
# Data prep for model
X = df.drop(['Class', 'Time'], axis=1)
y = df['Class']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_scaled, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)

In [None]:
# Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred))