# Load water level data from DMI

In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook

In [None]:
from anomalydetection.detectors import AnomalyDetectionPipeline, RangeDetector, DiffRangeDetector, RollingStandardDeviationDetector, HampelDetector

In [None]:
file_path = os.path.join("..", "tests", "data", "Ballen_20150218-20201222.csv")
df = pd.read_csv(file_path, index_col=0, parse_dates=True)
data = df.water_level

# Detect anomalies outside manually set range

In [None]:
range_anomalies = RangeDetector(-1, 1).detect(data)

In [None]:
detected = data.to_frame()
detected["anomalies"] = data[range_anomalies.values]
detected.plot(style=['-', 'o'], figsize=(8,3))

# Detect anomalies outside automatically set range

In [None]:
N = 1000
normal_data, test_data = data[:N], data[N:]

In [None]:
anomaly_detector = AnomalyDetectionPipeline([RangeDetector(), DiffRangeDetector()])
anomaly_detector.fit(normal_data)
detected_anomalies = anomaly_detector.detect_detailed(test_data)

In [None]:
detected_anomalies.head()

In [None]:
detected = test_data.to_frame()
detected["anomalies"] = test_data[detected_anomalies.is_anomaly]
detected.plot(style=['-', 'o'], figsize=(8,3))

# Detect peaks

In [None]:
detector = RollingStandardDeviationDetector(10, 0.1)
std_anomalies = detector.detect(data)
std_anomalies[0] = False

In [None]:
detected = data.to_frame()
detected["anomalies"] = data[std_anomalies.values]
detected.plot(style=['-', 'o'], figsize=(8,3))

# Hampel filter

The default threshold of the HampelDetector is 3, which means that a sample that deviates by more than three times of the rolling window's standard deviation is marked as an anomaly. **Increasing** the threshold marks **more** samples as anomalies, **decreasing** the threshold marks **fewer**.

In [None]:
detector = HampelDetector(window_size=20, threshold=3, use_numba=True)

In [None]:
anomalies = detector.detect(data)

In [None]:
detected = data.to_frame()
detected["anomalies"] = data[anomalies]
detected.plot(style=['-', 'o'], figsize=(8,3), title=f'Detected anomalies: {sum(anomalies)}')