In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix

# Load processed data
data_path = "/Users/michealomotosho/Documents/EDUCATION DOCUMENTS/DATA SCIENCE SELF PROJECT/TransGuard-AI/data/Processed/transactions_cleaned.csv"

df = pd.read_csv(data_path)

# Separate features and target
X = df.drop("Class", axis=1)
y = df["Class"]

# Fit Isolation Forest
iso_model = IsolationForest(
    n_estimators=100,
    max_samples='auto',
    contamination=0.0017,  # estimate fraction of fraud in data (adjust as needed)
    random_state=42
)

iso_model.fit(X)

# Predict anomalies
# Isolation Forest returns:
#   -1 → anomaly
#    1 → normal

y_pred = iso_model.predict(X)
# Convert to 0/1 labels
y_pred_binary = np.where(y_pred == -1, 1, 0)

# Metrics
print("Confusion Matrix:")
print(confusion_matrix(y, y_pred_binary))

print("\nClassification Report:")
print(classification_report(y, y_pred_binary, digits=4))

# Compute ROC AUC
roc_auc = roc_auc_score(y, y_pred_binary)
print(f"\nROC AUC: {roc_auc:.4f}")


Confusion Matrix:
[[283942    373]
 [   380    112]]

Classification Report:
              precision    recall  f1-score   support

           0     0.9987    0.9987    0.9987    284315
           1     0.2309    0.2276    0.2293       492

    accuracy                         0.9974    284807
   macro avg     0.6148    0.6132    0.6140    284807
weighted avg     0.9973    0.9974    0.9973    284807


ROC AUC: 0.6132
