# Anomaly Detection for IIoT Sensor Data

This notebook demonstrates anomaly detection using Isolation Forest on semiconductor fabrication sensor data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

%matplotlib inline

## 1. Load and Explore Data

In [None]:
# Load training data
df = pd.read_csv('../data/training_data.csv')
df.head()

In [None]:
# Data statistics
df.describe()

In [None]:
# Anomaly distribution
df['is_anomaly'].value_counts()

## 2. Visualize Data

In [None]:
# Pairplot
sns.pairplot(df, hue='is_anomaly')
plt.show()

## 3. Train Isolation Forest Model

In [None]:
# Prepare features
X = df[['temperature', 'vibration', 'pressure']]
y = df['is_anomaly']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train model
model = IsolationForest(
    n_estimators=100,
    contamination=0.05,
    random_state=42,
    max_samples='auto'
)
model.fit(X_train_scaled)

## 4. Evaluate Model

In [None]:
# Predictions
predictions = model.predict(X_test_scaled)
predictions = np.where(predictions == -1, 1, 0)  # Convert to 0/1

# Accuracy
accuracy = (predictions == y_test).mean()
print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, predictions)
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## 5. Save Model

In [None]:
# Save model and scaler
joblib.dump(model, '../models/isolation_forest.pkl')
joblib.dump(scaler, '../models/scaler.pkl')
print("Model saved!")