In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE

In [5]:
# ======================================
# Fraud Detection Jupyter Notebook
# ======================================

# Step 1: Import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
import joblib

# Step 2: Load dataset
# Update path if your dataset is in another folder
DATA_PATH = r"C:\Users\HP\OneDrive\frauddata\creditcard.csv"

try:
    df = pd.read_csv(DATA_PATH)
    print("✅ Dataset loaded successfully")
except FileNotFoundError:
    print("❌ Dataset not found! Please check the path:", DATA_PATH)

# Step 3: Data overview
print("\n📊 First 5 rows:")
print(df.head())
print("\n🔢 Class distribution (0=Normal, 1=Fraud):")
print(df["Class"].value_counts())

# Step 4: Features & Target
X = df.drop("Class", axis=1)
y = df["Class"]

# Scale Amount & Time features
scaler = StandardScaler()
X[["Amount", "Time"]] = scaler.fit_transform(X[["Amount", "Time"]])

# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 6: Handle Class Imbalance (SMOTE)
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("\nBefore SMOTE:", y_train.value_counts())
print("After SMOTE:", y_train_res.value_counts())

# Step 7: Train Model (Logistic Regression)
model = LogisticRegression(max_iter=1000)
model.fit(X_train_res, y_train_res)

# Step 8: Evaluation
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("\n📌 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📌 Classification Report:\n", classification_report(y_test, y_pred))
print("\n📌 ROC-AUC Score:", roc_auc_score(y_test, y_prob))

# Step 9: Demo Predictions
sample = X_test.iloc[:5]
print("\n🔍 Sample Transactions Prediction:")
print(model.predict(sample))
print("Fraud Probabilities:", model.predict_proba(sample)[:, 1])

# Step 10: Save trained model
os.makedirs("model", exist_ok=True)  # Ensure 'model/' folder exists
joblib.dump(model, "model/fraud_model.pkl")
print("\n✅ Model saved as model/fraud_model.pkl")


✅ Dataset loaded successfully

📊 First 5 rows:
   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267