# SAIMon - Anomaly Detection Experiments

This notebook demonstrates various anomaly detection algorithms for time series data.

## Contents
1. Data Collection from Prometheus
2. Data Exploration and Visualization
3. Feature Engineering
4. Anomaly Detection Algorithms
   - Z-Score
   - Isolation Forest
   - LSTM Autoencoder
5. Model Evaluation
6. Results Visualization

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Prometheus
from prometheus_api_client import PrometheusConnect

# ML libraries
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from scipy import stats

# Configuration
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (15, 6)

print("✅ Libraries imported successfully")

## 1. Connect to Prometheus

In [None]:
# Connect to Prometheus
PROMETHEUS_URL = 'http://localhost:9090'
prom = PrometheusConnect(url=PROMETHEUS_URL, disable_ssl=True)

print(f"Connected to Prometheus at {PROMETHEUS_URL}")

# List available metrics
metrics = prom.all_metrics()
print(f"\nAvailable metrics: {len(metrics)}")
print("\nSample metrics:")
for metric in metrics[:10]:
    print(f"  - {metric}")

## 2. Fetch Time Series Data

In [None]:
# Define time range
end_time = datetime.now()
start_time = end_time - timedelta(hours=24)  # Last 24 hours

# Choose a metric (change this to your metric)
metric_name = 'node_cpu_seconds_total'

# Fetch data
print(f"Fetching data for: {metric_name}")
print(f"Time range: {start_time} to {end_time}")

result = prom.custom_query_range(
    query=metric_name,
    start_time=start_time,
    end_time=end_time,
    step='1m'
)

# Convert to DataFrame
data_points = []
for metric_result in result:
    labels = metric_result['metric']
    values = metric_result['values']
    
    for timestamp, value in values:
        data_points.append({
            'timestamp': datetime.fromtimestamp(float(timestamp)),
            'value': float(value),
            'cpu': labels.get('cpu', 'unknown'),
            'instance': labels.get('instance', 'unknown')
        })

df = pd.DataFrame(data_points)
print(f"\n✅ Loaded {len(df)} data points")
df.head()

## 3. Data Exploration

In [None]:
# Basic statistics
print("Data Statistics:")
print(df['value'].describe())

# Plot time series
plt.figure(figsize=(15, 6))
plt.plot(df['timestamp'], df['value'], alpha=0.7)
plt.xlabel('Time')
plt.ylabel('Value')
plt.title(f'Time Series: {metric_name}')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Distribution
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.hist(df['value'], bins=50, edgecolor='black')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Value Distribution')

plt.subplot(1, 2, 2)
plt.boxplot(df['value'])
plt.ylabel('Value')
plt.title('Box Plot')
plt.tight_layout()
plt.show()

## 4. Feature Engineering

In [None]:
# Create features
df_features = df.copy()

# Rolling statistics
for window in [5, 10, 30]:
    df_features[f'rolling_mean_{window}'] = df_features['value'].rolling(window=window, min_periods=1).mean()
    df_features[f'rolling_std_{window}'] = df_features['value'].rolling(window=window, min_periods=1).std().fillna(0)

# Time-based features
df_features['hour'] = df_features['timestamp'].dt.hour
df_features['day_of_week'] = df_features['timestamp'].dt.dayofweek
df_features['is_weekend'] = df_features['day_of_week'].isin([5, 6]).astype(int)

print("Features created:")
print(df_features.columns.tolist())
df_features.head()

## 5. Anomaly Detection - Z-Score Method

In [None]:
# Z-Score anomaly detection
threshold = 3.0

mean = df_features['value'].mean()
std = df_features['value'].std()

df_features['z_score'] = np.abs((df_features['value'] - mean) / std)
df_features['is_anomaly_zscore'] = df_features['z_score'] > threshold

anomalies_zscore = df_features[df_features['is_anomaly_zscore']]
print(f"Z-Score detected {len(anomalies_zscore)} anomalies ({len(anomalies_zscore)/len(df_features)*100:.2f}%)")

# Visualize
plt.figure(figsize=(15, 6))
plt.plot(df_features['timestamp'], df_features['value'], label='Normal', alpha=0.7)
plt.scatter(anomalies_zscore['timestamp'], anomalies_zscore['value'], 
            color='red', label='Anomaly', s=50, zorder=5)
plt.xlabel('Time')
plt.ylabel('Value')
plt.title(f'Z-Score Anomaly Detection (threshold={threshold})')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 6. Anomaly Detection - Isolation Forest

In [None]:
# Prepare features for Isolation Forest
feature_cols = ['value', 'rolling_mean_10', 'rolling_std_10', 'hour']
X = df_features[feature_cols].fillna(0)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Isolation Forest
iso_forest = IsolationForest(
    contamination=0.1,  # Expected proportion of anomalies
    n_estimators=100,
    random_state=42
)

# Predict (-1 for anomaly, 1 for normal)
predictions = iso_forest.fit_predict(X_scaled)
df_features['is_anomaly_iforest'] = predictions == -1

# Anomaly scores (lower is more anomalous)
scores = iso_forest.decision_function(X_scaled)
df_features['anomaly_score_iforest'] = 1 - (scores - scores.min()) / (scores.max() - scores.min())

anomalies_iforest = df_features[df_features['is_anomaly_iforest']]
print(f"Isolation Forest detected {len(anomalies_iforest)} anomalies ({len(anomalies_iforest)/len(df_features)*100:.2f}%)")

# Visualize
plt.figure(figsize=(15, 6))
plt.plot(df_features['timestamp'], df_features['value'], label='Normal', alpha=0.7)
plt.scatter(anomalies_iforest['timestamp'], anomalies_iforest['value'], 
            color='red', label='Anomaly', s=50, zorder=5)
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Isolation Forest Anomaly Detection')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 7. Compare Algorithms

In [None]:
# Compare detections
fig, axes = plt.subplots(2, 1, figsize=(15, 10))

# Z-Score
axes[0].plot(df_features['timestamp'], df_features['value'], alpha=0.7)
axes[0].scatter(anomalies_zscore['timestamp'], anomalies_zscore['value'], 
                color='red', s=50, zorder=5)
axes[0].set_title(f'Z-Score Method ({len(anomalies_zscore)} anomalies)')
axes[0].set_ylabel('Value')

# Isolation Forest
axes[1].plot(df_features['timestamp'], df_features['value'], alpha=0.7)
axes[1].scatter(anomalies_iforest['timestamp'], anomalies_iforest['value'], 
                color='red', s=50, zorder=5)
axes[1].set_title(f'Isolation Forest ({len(anomalies_iforest)} anomalies)')
axes[1].set_xlabel('Time')
axes[1].set_ylabel('Value')

plt.tight_layout()
plt.show()

# Summary statistics
print("\nComparison Summary:")
print(f"Total data points: {len(df_features)}")
print(f"Z-Score anomalies: {len(anomalies_zscore)} ({len(anomalies_zscore)/len(df_features)*100:.2f}%)")
print(f"Isolation Forest anomalies: {len(anomalies_iforest)} ({len(anomalies_iforest)/len(df_features)*100:.2f}%)")

# Overlap
both_methods = df_features[df_features['is_anomaly_zscore'] & df_features['is_anomaly_iforest']]
print(f"Detected by both methods: {len(both_methods)}")

## 8. Export Results

In [None]:
# Save anomalies to CSV
output_file = f'../data/processed/anomalies_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
anomalies_combined = df_features[
    df_features['is_anomaly_zscore'] | df_features['is_anomaly_iforest']
][['timestamp', 'value', 'is_anomaly_zscore', 'is_anomaly_iforest', 'z_score', 'anomaly_score_iforest']]

anomalies_combined.to_csv(output_file, index=False)
print(f"✅ Anomalies exported to: {output_file}")
print(f"Total anomalies: {len(anomalies_combined)}")