In [2]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Load dataset
data = pd.read_csv("C:/Users/Arjunraj Mohanraj/Downloads/creditcard.csv")

# Features and target
X = data.drop("Class", axis=1)  # features
y = data["Class"]              # target: 0 = genuine, 1 = fraud

# Train-Test Split (80 train-20 test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------------------
# Training the Models
# -------------------------------

# Model 1-Logistic Regression
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train_scaled, y_train)
y_pred_log = log_model.predict(X_test_scaled)

# Model 2-Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)

# Model 3-Support Vector Machine
svm_model = SVC(kernel="rbf", class_weight="balanced")
svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)

# -------------------------------
# Evaluation Function
# -------------------------------
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n===== {model_name} =====")
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred, digits=4))

# Evaluates all models
evaluate_model(y_test, y_pred_log, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")
evaluate_model(y_test, y_pred_svm, "SVM")


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (



===== Logistic Regression =====
Confusion Matrix:
 [[56851    13]
 [   36    62]]
Classification Report:
               precision    recall  f1-score   support

           0     0.9994    0.9998    0.9996     56864
           1     0.8267    0.6327    0.7168        98

    accuracy                         0.9991     56962
   macro avg     0.9130    0.8162    0.8582     56962
weighted avg     0.9991    0.9991    0.9991     56962


===== Random Forest =====
Confusion Matrix:
 [[56859     5]
 [   18    80]]
Classification Report:
               precision    recall  f1-score   support

           0     0.9997    0.9999    0.9998     56864
           1     0.9412    0.8163    0.8743        98

    accuracy                         0.9996     56962
   macro avg     0.9704    0.9081    0.9371     56962
weighted avg     0.9996    0.9996    0.9996     56962


===== SVM =====
Confusion Matrix:
 [[56708   156]
 [   24    74]]
Classification Report:
               precision    recall  f1-score   s