In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix

# ==========================================
# Step 1: Create Dummy Data (Imbalanced)
# ==========================================
# We create 100 normal transactions and 5 fraud ones
import numpy as np
rng = np.random.RandomState(42)

# Normal transactions (centered around 0)
X_normal = 0.3 * rng.randn(100, 2)
X_train = np.r_[X_normal + 2, X_normal - 2] # 200 normal points

# Fraud transactions (Outliers - far away)
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

# Combine them
X = np.r_[X_train, X_outliers] # Total dataset
n_outliers = len(X_outliers)

# ==========================================
# Step 2: Define & Train Model
# ==========================================
# contamination='auto' or a float (e.g., 0.1 for 10% fraud)
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(X)

# ==========================================
# Step 3: Prediction
# ==========================================
# The model returns:
#  1 for Normal
# -1 for Anomaly (Fraud)
y_pred = model.predict(X)

# ==========================================
# Step 4: Visualization
# ==========================================
plt.figure(figsize=(8, 6))

# Plot Normal (predicted as 1)
plt.scatter(X[y_pred == 1, 0], X[y_pred == 1, 1], c='blue', label='Normal Transaction')

# Plot Fraud (predicted as -1)
plt.scatter(X[y_pred == -1, 0], X[y_pred == -1, 1], c='red', marker='x', s=100, label='Fraud Detection')

plt.title("Isolation Forest - Fraud Detection")
plt.legend()
plt.show()

# ==========================================
# Step 5: Evaluation
# ==========================================
# To evaluate, we need ground truth. 
# Let's create the "Actual" labels manually for this test:
# First 200 were normal (1), last 20 were outliers (-1)
y_actual = np.ones(len(X))
y_actual[-n_outliers:] = -1 

print("Confusion Matrix:")
print(confusion_matrix(y_actual, y_pred))

print("\nClassification Report:")
print(classification_report(y_actual, y_pred, target_names=['Fraud', 'Normal']))