In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.covariance import EllipticEnvelope
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
print("Setup complete")

In [None]:
# Generate data with outliers
np.random.seed(42)
normal = np.random.normal(0, 1, (200, 2))
outliers = np.random.uniform(-5, 5, (10, 2))
data = np.vstack([normal, outliers])
labels = np.hstack([np.zeros(200), np.ones(10)])
print(f"Data shape: {data.shape}, Outliers: {labels.sum()}")

In [None]:
# Isolation Forest
iso = IsolationForest(contamination=0.05, random_state=42)
iso_pred = iso.fit_predict(data)
iso_scores = iso.score_samples(data)
print(f"Isolation Forest - Anomalies detected: {(iso_pred == -1).sum()}")

In [None]:
# LOF
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
lof_pred = lof.fit_predict(data)
lof_scores = lof.negative_outlier_factor_
print(f"LOF - Anomalies detected: {(lof_pred == -1).sum()}")

In [None]:
# Elliptic Envelope (Gaussian)
ee = EllipticEnvelope(contamination=0.05, random_state=42)
ee_pred = ee.fit_predict(data)
ee_scores = ee.decision_function(data)
print(f"Elliptic Envelope - Anomalies detected: {(ee_pred == -1).sum()}")

In [None]:
# Statistical method: Z-score
from scipy import stats
z_scores = np.abs(stats.zscore(data))
z_pred = ((z_scores > 3).any(axis=1)).astype(int) * -2 + 1
print(f"Z-Score - Anomalies detected: {(z_pred == -1).sum()}")

In [None]:
# Visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Isolation Forest
axes[0, 0].scatter(data[iso_pred == 1, 0], data[iso_pred == 1, 1], c='blue', alpha=0.5)
axes[0, 0].scatter(data[iso_pred == -1, 0], data[iso_pred == -1, 1], c='red', s=100, marker='x')
axes[0, 0].set_title('Isolation Forest')

# LOF
axes[0, 1].scatter(data[lof_pred == 1, 0], data[lof_pred == 1, 1], c='blue', alpha=0.5)
axes[0, 1].scatter(data[lof_pred == -1, 0], data[lof_pred == -1, 1], c='red', s=100, marker='x')
axes[0, 1].set_title('LOF')

# Elliptic Envelope
axes[1, 0].scatter(data[ee_pred == 1, 0], data[ee_pred == 1, 1], c='blue', alpha=0.5)
axes[1, 0].scatter(data[ee_pred == -1, 0], data[ee_pred == -1, 1], c='red', s=100, marker='x')
axes[1, 0].set_title('Elliptic Envelope')

# Z-Score
axes[1, 1].scatter(data[z_pred == 1, 0], data[z_pred == 1, 1], c='blue', alpha=0.5)
axes[1, 1].scatter(data[z_pred == -1, 0], data[z_pred == -1, 1], c='red', s=100, marker='x')
axes[1, 1].set_title('Z-Score')

plt.tight_layout()
plt.show()

In [None]:
# Comparison
methods = {'Isolation Forest': iso_pred, 'LOF': lof_pred, 'Elliptic Envelope': ee_pred, 'Z-Score': z_pred}
anomalies_count = {name: (pred == -1).sum() for name, pred in methods.items()}

plt.figure(figsize=(10, 5))
plt.bar(anomalies_count.keys(), anomalies_count.values())
plt.ylabel('Anomalies Detected')
plt.title('Anomaly Detection Methods Comparison')
plt.xticks(rotation=45)
plt.grid(alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

In [None]:
test_results = []
test1 = (iso_pred == -1).sum() > 0
test_results.append(("Test 1: Isolation Forest detects anomalies", test1))
test2 = (lof_pred == -1).sum() > 0
test_results.append(("Test 2: LOF detects anomalies", test2))
test3 = (ee_pred == -1).sum() > 0
test_results.append(("Test 3: Elliptic Envelope detects anomalies", test3))
test4 = (z_pred == -1).sum() > 0
test_results.append(("Test 4: Z-Score detects anomalies", test4))
test5 = len(methods) == 4
test_results.append(("Test 5: All methods implemented", test5))
passed = sum(1 for _, r in test_results if r)
print(f"\nPASSED: {passed}/{len(test_results)}")