In [1]:
import pandas as pd

# ✅ Load Data
df = pd.read_csv("data/processed_data.csv")

# ✅ Count Fraud vs Non-Fraud Transactions
class_counts = df["Class"].value_counts()
print("✅ Fraud vs Non-Fraud Class Distribution:\n", class_counts)


✅ Fraud vs Non-Fraud Class Distribution:
 0    284315
1       492
Name: Class, dtype: int64


In [2]:
# ✅ Show Only Fraud Cases
fraud_cases = df[df["Class"] == 1]
print(f"🚨 Found {len(fraud_cases)} fraud transactions!\n")
print(fraud_cases.head())  # Show first 5 fraud cases


🚨 Found 492 fraud transactions!

        Time        V1        V2        V3        V4        V5        V6  \
541    406.0 -2.312227  1.951992 -1.609851  3.997906 -0.522188 -1.426545   
623    472.0 -3.043541 -3.157307  1.088463  2.288644  1.359805 -1.064823   
4920  4462.0 -2.303350  1.759247 -0.359745  2.330243 -0.821628 -0.075788   
6108  6986.0 -4.397974  1.358367 -2.592844  2.679787 -1.128131 -1.706536   
6329  7519.0  1.234235  3.019740 -4.304597  4.732795  3.624201 -1.357746   

            V7        V8        V9  ...       V25       V26       V27  \
541  -2.537387  1.391657 -2.770089  ...  0.044519  0.177840  0.261145   
623   0.325574 -0.067794 -0.270953  ...  0.279798 -0.145362 -0.252773   
4920  0.562320 -0.399147 -0.238253  ... -0.156114 -0.542628  0.039566   
6108 -3.496197 -0.248778 -0.247768  ...  0.252405 -0.657488 -0.827136   
6329  1.713445 -0.496358 -1.282858  ...  1.488901  0.566797 -0.010016   

           V28  Class  Transaction_hour  Transaction_Day  \
541  -0.143

In [10]:
import pandas as pd
import joblib
from sklearn.metrics import classification_report, roc_auc_score

# ✅ Load Trained Model
model = joblib.load("models/fraud_detection.pkl")
print("✅ Model Loaded Successfully!")

# ✅ Load Processed Data
df = pd.read_csv("C:/Users/Amulya/OneDrive/Documents/Desktop/Assignment1/Data/processed_data.csv")

# ✅ Select Features (Same as Model Training)
features = ["Amount_Scaled", "Transaction_Hour", "Transaction_Day"] + [f"V{i}" for i in range(1, 29)]
X = df[features]
y = df["Class"]

# ✅ Print Data Sample
print("✅ Data Sample:\n", df.head())


✅ Model Loaded Successfully!
✅ Data Sample:
          V1        V2        V3        V4        V5        V6        V7  \
0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9       V10  ...       V27       V28  Class  \
0  0.098698  0.363787  0.090794  ...  0.133558 -0.021053      0   
1  0.085102 -0.255425 -0.166974  ... -0.008983  0.014724      0   
2  0.247676 -1.514654  0.207643  ... -0.055353 -0.059752      0   
3  0.377436 -1.387024 -0.054952  ...  0.062723  0.061458      0   
4 -0.270533  0.817739  0.753074  ...  0.219422  0.215153      0   

   Transaction_Hour  Transaction_Day  Time_Since_Last_Transaction  Peak_Hour  \
0               0.0  

In [11]:
# ✅ Make Predictions
y_proba = model.predict_proba(X)[:, 1]  # Get fraud probability
y_pred = (y_proba >= 0.5).astype(int)  # Convert probability to 0 or 1

# ✅ Evaluate Model
print("\n📊 Classification Report:\n", classification_report(y, y_pred))
print("🎯 ROC-AUC Score:", roc_auc_score(y, y_proba))



📊 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.60      0.86      0.71       492

    accuracy                           1.00    284807
   macro avg       0.80      0.93      0.86    284807
weighted avg       1.00      1.00      1.00    284807

🎯 ROC-AUC Score: 0.9837865192748968


In [12]:
fraud_case = df[df["Class"] == 1].sample(1, random_state=42)
fraud_prediction = model.predict(fraud_case[features])[0]

print(f"🚨 Fraud Prediction (Expected: True): {bool(fraud_prediction)}")


🚨 Fraud Prediction (Expected: True): True


In [13]:
non_fraud_case = df[df["Class"] == 0].sample(1, random_state=42)
non_fraud_prediction = model.predict(non_fraud_case[features])[0]

print(f"✅ Non-Fraud Prediction (Expected: False): {bool(non_fraud_prediction)}")


✅ Non-Fraud Prediction (Expected: False): False


In [14]:
from sklearn.metrics import precision_recall_curve
import numpy as np

# ✅ Compute Precision-Recall Curve
precision, recall, thresholds = precision_recall_curve(y, y_proba)

# ✅ Find the Best Threshold
best_threshold = thresholds[np.argmax(precision * recall)]  # Maximizes precision & recall balance
y_pred_adjusted = (y_proba >= best_threshold).astype(int)

# ✅ Evaluate Model with Adjusted Threshold
print("\n📊 Classification Report (Optimized Threshold):\n", classification_report(y, y_pred_adjusted))
print("🎯 Optimized ROC-AUC Score:", roc_auc_score(y, y_proba))



📊 Classification Report (Optimized Threshold):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.91      0.82      0.87       492

    accuracy                           1.00    284807
   macro avg       0.96      0.91      0.93    284807
weighted avg       1.00      1.00      1.00    284807

🎯 Optimized ROC-AUC Score: 0.9837865192748968
