In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import seaborn as sns

# Example: Create sample data
rng = np.random.RandomState(42)

# Generate normal data
normal_data = 0.3 * rng.randn(100, 2)
normal_data = np.r_[normal_data + 2, normal_data - 2]

# Generate some outliers
outliers = rng.uniform(low=-6, high=6, size=(20, 2))

# Combine into one dataset
X = np.r_[normal_data, outliers]

# Convert to DataFrame
df = pd.DataFrame(X, columns=['Feature1', 'Feature2'])

# Plot the raw data
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='Feature1', y='Feature2')
plt.title("Raw Data")
plt.grid(True)
plt.show()

# Fit Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)
df['anomaly'] = iso_forest.fit_predict(df[['Feature1', 'Feature2']])

# anomaly = -1 (outlier), 1 (normal)
outliers_df = df[df['anomaly'] == -1]
inliers_df = df[df['anomaly'] == 1]

# Visualize
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Feature1', y='Feature2', data=inliers_df, label='Normal')
sns.scatterplot(x='Feature1', y='Feature2', data=outliers_df, label='Anomaly', color='r')
plt.title("Anomaly Detection with Isolation Forest")
plt.legend()
plt.grid(True)
plt.show()
