# Anomaly Detection & Grid State Estimation

From the [Sisyphean Gridworks ML Playground](https://sgridworks.com/ml-playground/guides/08-anomaly-detection.html)

## Setup

Clone the repository and install dependencies. Run this cell first.

In [None]:
!git clone https://github.com/SGridworks/Dynamic-Network-Model.git 2>/dev/null || echo 'Already cloned'
%cd Dynamic-Network-Model
!pip install -q pandas numpy matplotlib seaborn scikit-learn xgboost lightgbm pyarrow

## Load and Explore AMI Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from demo_data.load_demo_data import load_customer_interval_data

# Load AMI data
ami = load_customer_interval_data()

print(f"AMI records: {len(ami):,}")
print(f"Columns: {list(ami.columns)}")
print(f"Customers: {ami['customer_id'].nunique()}")
print(ami.head())

## Focus on Voltage Readings

In [None]:
# Pick one customer to study in detail
meter = ami[ami["customer_id"] == ami["customer_id"].unique()[0]].copy()
meter["timestamp"] = pd.to_datetime(meter["timestamp"])
meter = meter.sort_values("timestamp")

# Plot voltage over a month
one_month = meter[(meter["timestamp"] >= "2024-06-01") &
                  (meter["timestamp"] "2024-07-01")]

fig, ax = plt.subplots(figsize=(14, 4))
ax.plot(one_month["timestamp"], one_month["voltage_v"], linewidth=0.5, color="#2D6A7A")
ax.axhline(y=126, color="red", linestyle="--", alpha=0.5, label="ANSI upper (126V)")
ax.axhline(y=114, color="red", linestyle="--", alpha=0.5, label="ANSI lower (114V)")
ax.set_title("AMI Voltage Readings — June 2024")
ax.set_ylabel("Voltage (V)")
ax.legend()
plt.tight_layout()
plt.show()

## Engineer Features for Anomaly Detection

In [None]:
# Create features from voltage readings across all customers
ami["timestamp"] = pd.to_datetime(ami["timestamp"])
ami["hour"] = ami["timestamp"].dt.hour

# Aggregate to hourly statistics per customer
hourly = ami.groupby(["customer_id", ami["timestamp"].dt.floor("h")]).agg(
    voltage_mean=("voltage_v", "mean"),
    voltage_std=("voltage_v", "std"),
    voltage_min=("voltage_v", "min"),
    voltage_max=("voltage_v", "max"),
    energy_kwh=("energy_kwh", "sum"),
).reset_index()

# Add voltage range (spread) as a feature
hourly["voltage_range"] = hourly["voltage_max"] - hourly["voltage_min"]

# Fill any NaN values
hourly = hourly.fillna(0)

print(f"Hourly feature rows: {len(hourly):,}")
print(hourly.describe())

## Train the Isolation Forest

In [None]:
# Select features for anomaly detection
feature_cols = ["voltage_mean", "voltage_std", "voltage_range",
                "voltage_min", "voltage_max", "energy_kwh"]

X = hourly[feature_cols]

# Standardize features (important for distance-based methods)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Isolation Forest
# contamination = expected % of anomalies (start with 1%)
iso_forest = IsolationForest(
    n_estimators=200,
    contamination=0.01,
    random_state=42
)

iso_forest.fit(X_scaled)
print("Isolation Forest training complete.")

# Predict: -1 = anomaly, 1 = normal
hourly["anomaly"] = iso_forest.predict(X_scaled)
hourly["anomaly_score"] = iso_forest.decision_function(X_scaled)

n_anomalies = (hourly["anomaly"] == -1).sum()
print(f"Anomalies detected: {n_anomalies} ({n_anomalies/len(hourly)*100:.2f}%)")

## Visualize the Anomalies

In [None]:
# Plot anomalies on the voltage timeline
anomalies = hourly[hourly["anomaly"] == -1]
normal    = hourly[hourly["anomaly"] == 1]

fig, ax = plt.subplots(figsize=(14, 5))
ax.scatter(normal["voltage_mean"], normal["voltage_std"],
           c="#5FCCDB", s=5, alpha=0.3, label="Normal")
ax.scatter(anomalies["voltage_mean"], anomalies["voltage_std"],
           c="red", s=30, marker="x", label="Anomaly")
ax.set_xlabel("Mean Voltage (V)")
ax.set_ylabel("Voltage Std Dev")
ax.set_title("Isolation Forest: Anomaly Detection in AMI Voltage Data")
ax.legend()
plt.tight_layout()
plt.show()

## Build the Autoencoder

An autoencoder is a neural network that learns to compress data into a small representation and then reconstruct it. If the network is trained on normal data, it will reconstruct normal patterns well but struggle with anomalies—producing high reconstruction error.

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Define the autoencoder architecture
class VoltageAutoencoder(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        # Encoder: compress from input_dim down to 3
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 3),  # bottleneck layer
        )
        # Decoder: reconstruct from 3 back to input_dim
        self.decoder = nn.Sequential(
            nn.Linear(3, 8),
            nn.ReLU(),
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim),
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Create the model
input_dim = len(feature_cols)
model = VoltageAutoencoder(input_dim)
print(model)

## Train the Autoencoder on Normal Data

In [None]:
# Use only normal data for training (filter out Isolation Forest anomalies)
normal_data = hourly[hourly["anomaly"] == 1][feature_cols]
normal_scaled = scaler.fit_transform(normal_data)

# Split into train (80%) and validation (20%)
split = int(len(normal_scaled) * 0.8)
train_data = torch.FloatTensor(normal_scaled[:split])
val_data   = torch.FloatTensor(normal_scaled[split:])

# Create data loaders
train_loader = DataLoader(TensorDataset(train_data, train_data),
                          batch_size=64, shuffle=True)

# Training setup
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train for 50 epochs
losses = []
for epoch in range(50):
    model.train()
    epoch_loss = 0
    for batch_x, batch_y in train_loader:
        output = model(batch_x)
        loss = criterion(output, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    losses.append(avg_loss)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1:>3}/50  Loss: {avg_loss:.6f}")

# Plot training loss
plt.figure(figsize=(8, 4))
plt.plot(losses, color="#5FCCDB")
plt.title("Autoencoder Training Loss")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.tight_layout()
plt.show()

## Detect Anomalies by Reconstruction Error

In [None]:
# Run ALL data through the autoencoder (normal + anomalous)
model.eval()
all_scaled = scaler.transform(hourly[feature_cols])
all_tensor = torch.FloatTensor(all_scaled)

with torch.no_grad():
    reconstructed = model(all_tensor)
    recon_error = torch.mean((all_tensor - reconstructed) ** 2, dim=1)

hourly["recon_error"] = recon_error.numpy()

# Set threshold at 99th percentile of reconstruction error
threshold = hourly["recon_error"].quantile(0.99)
hourly["ae_anomaly"] = (hourly["recon_error"] > threshold).astype(int)

print(f"Reconstruction error threshold: {threshold:.4f}")
print(f"Autoencoder anomalies: {hourly['ae_anomaly'].sum()}")

## Compare Both Methods

In [None]:
# Compare Isolation Forest vs Autoencoder detections
hourly["iso_anomaly"] = (hourly["anomaly"] == -1).astype(int)

both = (hourly["iso_anomaly"] & hourly["ae_anomaly"]).sum()
iso_only = (hourly["iso_anomaly"] & ~hourly["ae_anomaly"]).sum()
ae_only  = (~hourly["iso_anomaly"] & hourly["ae_anomaly"]).sum()

print(f"Flagged by both methods:       {both}")
print(f"Isolation Forest only:         {iso_only}")
print(f"Autoencoder only:              {ae_only}")

# Reconstruction error distribution
fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(hourly["recon_error"], bins=100, color="#5FCCDB", edgecolor="white")
ax.axvline(x=threshold, color="red", linestyle="--",
           label=f"Threshold ({threshold:.4f})")
ax.set_xlabel("Reconstruction Error")
ax.set_ylabel("Frequency")
ax.set_title("Autoencoder Reconstruction Error Distribution")
ax.set_yscale("log")
ax.legend()
plt.tight_layout()
plt.show()

## Investigate the Anomalies

In [None]:
# Look at the top anomalies detected by both methods
high_confidence = hourly[(hourly["iso_anomaly"] == 1) & (hourly["ae_anomaly"] == 1)]
high_confidence = high_confidence.sort_values("recon_error", ascending=False)

print("Top 10 highest-confidence anomalies:\n")
print(high_confidence[["customer_id", "timestamp", "voltage_mean",
      "voltage_std", "voltage_range", "recon_error"]].head(10).to_string(index=False))

## What You Built and Next Steps