# DensityAD Tuning Guide

Learn how to configure detection parameters and filtering options.

**Topics:**
- Detection algorithm parameters
- Post-processing filters
- Sensitivity analysis
- Production configuration

In [None]:
import sys
sys.path.append('../..')

from gradientcast import GradientCastDenseAD
from utils.synthetic_data import generate_ad_payload_data
import matplotlib.pyplot as plt
import numpy as np

# Replace with your API key
GRADIENTCAST_API_KEY = "your-api-key-here"

dense_ad = GradientCastDenseAD(api_key=GRADIENTCAST_API_KEY)

# Generate test data
data = generate_ad_payload_data(n_points=150, inject_anomalies=True)

---
## Detection Parameters

### Contamination

The expected proportion of anomalies in the dataset.

In [None]:
# Test different contamination values
contamination_values = [0.01, 0.05, 0.10, 0.15, 0.20]
results = {}

for cont in contamination_values:
    result = dense_ad.detect(data, contamination=cont)
    results[cont] = {
        'raw': sum(1 for p in result.timeline if p.raw_anomaly),
        'confirmed': len(result.anomalies)
    }

print("Contamination vs Detection:")
print(f"{'Contamination':>12} | {'Raw':>6} | {'Confirmed':>10}")
print("-" * 35)
for cont, counts in results.items():
    print(f"{cont:>12.0%} | {counts['raw']:>6} | {counts['confirmed']:>10}")

In [None]:
# Visualize
plt.figure(figsize=(10, 5))
x = [c * 100 for c in contamination_values]
raw = [results[c]['raw'] for c in contamination_values]
confirmed = [results[c]['confirmed'] for c in contamination_values]

plt.plot(x, raw, 'b-o', label='Raw (initial detection)', markersize=8)
plt.plot(x, confirmed, 'r-o', label='Confirmed (after filtering)', markersize=8)

plt.xlabel('Contamination (%)')
plt.ylabel('Anomalies Detected')
plt.title('Impact of Contamination Parameter')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

print("\nRecommendation: Start with 5% (0.05) and adjust based on false positive rate")

### N Neighbors

Number of neighbors for density calculation. Affects pattern sensitivity.

In [None]:
# Test different n_neighbors values
neighbor_values = [5, 10, 15, 20, 30, 40]
neighbor_results = {}

for n in neighbor_values:
    result = dense_ad.detect(data, n_neighbors=n)
    neighbor_results[n] = len(result.anomalies)

print("N Neighbors vs Confirmed Anomalies:")
for n, count in neighbor_results.items():
    print(f"  n_neighbors={n:2d}: {count} anomalies")

---
## Post-Processing Filters

### Valley Threshold

Minimum value to consider for anomaly detection. Filters out noise on low-volume metrics.

In [None]:
# Test valley threshold impact
thresholds = [0, 100000, 500000, 1000000, 2000000]
valley_results = {}

for thresh in thresholds:
    result = dense_ad.detect(data, valley_threshold=thresh)
    valley_results[thresh] = len(result.anomalies)

print("Valley Threshold vs Confirmed Anomalies:")
for thresh, count in valley_results.items():
    print(f"  threshold={thresh:>10,}: {count} anomalies")

print("\nNote: Higher threshold = more points filtered as 'below meaningful volume'")

### Minimum Contiguous Anomalies

Requires N consecutive anomalies to confirm. Reduces isolated false positives.

In [None]:
# Test contiguous requirement
contiguous_values = [1, 2, 3, 4, 5]
contiguous_results = {}

for n in contiguous_values:
    result = dense_ad.detect(data, min_contiguous_anomalies=n)
    contiguous_results[n] = len(result.anomalies)

print("Min Contiguous vs Confirmed Anomalies:")
for n, count in contiguous_results.items():
    print(f"  min_contiguous={n}: {count} anomalies")

print("\nRecommendation: 2-3 for hourly data, reduces noise while catching real issues")

---
## Combined Parameter Sweep

In [None]:
# Grid search for optimal parameters
contamination_opts = [0.03, 0.05, 0.10]
neighbor_opts = [10, 20, 30]
contiguous_opts = [2, 3]

print("Parameter Combinations:")
print(f"{'Contamination':>12} | {'Neighbors':>9} | {'Contiguous':>10} | {'Anomalies':>9}")
print("-" * 50)

best_config = None
best_score = 0

for cont in contamination_opts:
    for neigh in neighbor_opts:
        for contig in contiguous_opts:
            result = dense_ad.detect(
                data,
                contamination=cont,
                n_neighbors=neigh,
                min_contiguous_anomalies=contig
            )
            count = len(result.anomalies)
            print(f"{cont:>12.0%} | {neigh:>9} | {contig:>10} | {count:>9}")
            
            # Track configuration with moderate detection
            if 3 <= count <= 10 and count > best_score:
                best_score = count
                best_config = (cont, neigh, contig)

if best_config:
    print(f"\nSuggested config: contamination={best_config[0]}, n_neighbors={best_config[1]}, min_contiguous={best_config[2]}")

---
## Severity Thresholds

Severity is determined by normalized score and z-score.

In [None]:
# Get result with default parameters
result = dense_ad.detect(data)

# Analyze severity distribution
if result.anomalies:
    print("Severity Analysis:")
    print(f"{'Severity':>10} | {'Count':>6} | {'Avg Norm Score':>14} | {'Avg Z-Score':>12}")
    print("-" * 50)
    
    for sev in ['low', 'medium', 'high', 'critical']:
        sev_points = [p for p in result.anomalies if p.magnitude.severity == sev]
        if sev_points:
            avg_norm = np.mean([p.magnitude.normalized_score for p in sev_points])
            zscores = [p.magnitude.zscore_24h for p in sev_points if p.magnitude.zscore_24h]
            avg_z = np.mean(zscores) if zscores else 0
            print(f"{sev:>10} | {len(sev_points):>6} | {avg_norm:>14.1f} | {avg_z:>12.2f}")
else:
    print("No anomalies detected with current parameters")

---
## Configuration Examples

### High Sensitivity (Catch More)

```python
result = dense_ad.detect(
    data,
    contamination=0.10,           # Higher contamination
    n_neighbors=15,               # Fewer neighbors
    min_contiguous_anomalies=1,   # No contiguous requirement
    valley_threshold=100000       # Lower volume threshold
)
```

### Balanced (Default-like)

```python
result = dense_ad.detect(
    data,
    contamination=0.05,
    n_neighbors=20,
    min_contiguous_anomalies=2
)
```

### Low Sensitivity (Reduce Noise)

```python
result = dense_ad.detect(
    data,
    contamination=0.02,           # Lower contamination
    n_neighbors=30,               # More neighbors
    min_contiguous_anomalies=3,   # Require 3 consecutive
    valley_threshold=2000000      # Higher volume threshold
)
```

**Next:** [Real-time Simulation](03_simulation.ipynb)