# Credit Card Fraud Detection using Machine Learning

## 1. Load Dataset and Inspect Class Imbalance

In [None]:

import pandas as pd

df = pd.read_csv("creditcard.csv")
df['Class'].value_counts()


## 2. Feature and Target Separation

In [None]:

X = df.drop('Class', axis=1)
y = df['Class']


## 3. Stratified Train-Test Split

In [None]:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## 4. Baseline Model: Logistic Regression

In [None]:

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

print(classification_report(y_test, lr_pred))


## 5. Random Forest Classifier

In [None]:

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

print(classification_report(y_test, rf_pred))


## 6. Feature Importance Plot

In [None]:

import matplotlib.pyplot as plt

importances = rf.feature_importances_
features = X.columns

plt.figure(figsize=(10,6))
plt.barh(features, importances)
plt.title("Feature Importances")
plt.show()


## 7. Model Comparison


Random Forest shows improved recall and F1-score compared to Logistic Regression,
making it more suitable for fraud detection tasks.


## 8. Save Best Model

In [None]:

import joblib
joblib.dump(rf, "best_fraud_model.pkl")
