In [None]:
#Train and evaluate models on Fraud_Data
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score

In [2]:

# -------------------------
# Load scaled + cleaned data
# -------------------------
X_train = pd.read_csv("../data/X_train_scaled.csv")
X_test = pd.read_csv("../data/X_test_scaled.csv")
y_train = pd.read_csv("../data/y_train_smote.csv")['class']
y_test = pd.read_csv("../data/y_test_original.csv")['class']


In [3]:
# -------------------------
# 1. Logistic Regression
# -------------------------
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)
lr_preds = lr_model.predict(X_test)
lr_probs = lr_model.predict_proba(X_test)[:, 1]

In [4]:
# -------------------------
# 2. Random Forest
# -------------------------
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)
rf_probs = rf_model.predict_proba(X_test)[:, 1]

In [5]:

# -------------------------
# 3. Evaluation Function
# -------------------------
def evaluate(y_true, preds, probs, model_name):
    print(f"\n📊 Evaluation for {model_name}")
    print("Confusion Matrix:\n", confusion_matrix(y_true, preds))
    print("\nClassification Report:\n", classification_report(y_true, preds, digits=4))
    print("🔹 ROC-AUC Score:", roc_auc_score(y_true, probs))
    print("🔸 PR-AUC Score :", average_precision_score(y_true, probs))


In [7]:

# -------------------------
# 4. Evaluate both models
# -------------------------
evaluate(y_test, lr_preds, lr_probs, "Logistic Regression")



📊 Evaluation for Logistic Regression
Confusion Matrix:
 [[31132  9957]
 [ 1679  2566]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9488    0.7577    0.8425     41089
           1     0.2049    0.6045    0.3061      4245

    accuracy                         0.7433     45334
   macro avg     0.5769    0.6811    0.5743     45334
weighted avg     0.8792    0.7433    0.7923     45334

🔹 ROC-AUC Score: 0.7210578914838572
🔸 PR-AUC Score : 0.2781827264454265


In [8]:
evaluate(y_test, rf_preds, rf_probs, "Random Forest")


📊 Evaluation for Random Forest
Confusion Matrix:
 [[40952   137]
 [ 1974  2271]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9540    0.9967    0.9749     41089
           1     0.9431    0.5350    0.6827      4245

    accuracy                         0.9534     45334
   macro avg     0.9486    0.7658    0.8288     45334
weighted avg     0.9530    0.9534    0.9475     45334

🔹 ROC-AUC Score: 0.7712675415350648
🔸 PR-AUC Score : 0.6289248996359332
