In [None]:
import datetime as dt
import sys

from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
from river import utils, proba

sys.path.insert(1, str(Path().resolve().parent))
from server import GaussianScorer
from functions.plot import plot_limits

In [None]:
df = pd.read_csv('data/input/inverter_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
col = 'Inverter Temperature'

In [None]:
df = pd.read_csv('data/input/average_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
col = 'Average Cell Temperature'

In [None]:
# CONSTANTS
THRESHOLD = 0.99735
GRACE_PERIOD=60*24
WINDOW = dt.timedelta(hours=24*7)

model = GaussianScorer(
    utils.TimeRolling(proba.Gaussian(), period=WINDOW),
                grace_period=GRACE_PERIOD
            )

anomaly_samples = []
list_thresh_pos = []
list_thresh_neg = []
mus = []
sigmas = []
samples = []

for i, (t, x) in enumerate(df.iterrows()):
    t = t.tz_localize(None)
    x = x[col]
    if i == 0:
        model.gaussian.obj = model.gaussian._from_state(0, x, 1e-5, 1)
        
    is_anomaly = model.predict_one(x)
    anomaly_samples.append(is_anomaly)    
    
    thresh_high, thresh_low = model.limit_one()
    #thresh_high = thresh_high if thresh_high < 1 else 1
    list_thresh_pos.append(thresh_high)
    #thresh_low = thresh_low if thresh_low > 0 else 0
    list_thresh_neg.append(thresh_low)
    # the sample before previous is anomalous
    
    sigmas.append(model.gaussian._var.get())
    mus.append(model.gaussian.mu)
    
    samples.append(model.gaussian.n_samples)
    
    if not is_anomaly or (sum(anomaly_samples[-300:-1]) / len(anomaly_samples[-300:-1]) > 0.9973):
        model = model.learn_one(x, **{'t': t})
    

s_mean = pd.Series(mus, index=df.index)
s_std = pd.Series(sigmas, index=df.index)

s_env_pos = s_mean + 3 * s_std**0.5
s_env_neg = s_mean - 3 * s_std**0.5

df_out = pd.DataFrame({"level_high": list_thresh_pos,
                       "level_low": list_thresh_neg,
                       "anomaly": anomaly_samples},
                      index= df.index)

In [None]:
text = (f"Sliding window: {WINDOW}\n"
        f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

In [None]:
file_name = (f"Dataw_"
             f"{int(WINDOW.total_seconds()/60/60)}_hours_sliding")


plot_limits(df[col], df_out.anomaly, df_out.level_high, df_out.level_low, 
            file_name=file_name, save=False, **{"ser_mean": s_mean,
                                               "ser_pos": s_env_pos,
                                               "ser_neg": s_env_neg})

In [None]:
from river import anomaly
from river import metrics
import numpy as np
from river import feature_extraction as fx


class QuantileFilter(anomaly.QuantileFilter):
  def __init__(self, anomaly_detector, q: float, protect_anomaly_detector=True):
        super().__init__(
            anomaly_detector=anomaly_detector,
            protect_anomaly_detector=protect_anomaly_detector,
            q=q
        )
  def predict_one(self, *args):
    score = self.score_one(*args)
    return score >= (self.quantile.get() or np.inf)
    
    
model = (
    QuantileFilter(
        anomaly.OneClassSVM(nu=0.0001),
    q=0.9975
    )
    )

auc = metrics.ROCAUC()

anomaly_samples = []
scores = []
quantiles = []

for i, (t, x) in enumerate(df.iterrows()):
    x = x.values[0]
    score = model.score_one({'data': x}); scores.append(score)
    is_anomaly = model.predict_one({'data': x})
    anomaly_samples.append(is_anomaly if i > 0 else False)
    model = model.learn_one({'data': x})
    quantiles.append(model.quantile.get())
    #auc = auc.update(y, is_anomaly)
    
#print(auc)

anomalies_svm = pd.Series(anomaly_samples, index=df.index)

text = (f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

plt.plot(df.index, scores)
plt.plot(df.index, quantiles)

In [None]:
from river import anomaly, preprocessing
from river import metrics

scaler = preprocessing.MinMaxScaler()

model = (
    QuantileFilter(
        anomaly.HalfSpaceTrees(window_size=50,limits={'data':(0,110)}),
    q=0.9975
    )
    )

auc = metrics.ROCAUC()

anomaly_samples = []
scores = []
quantiles = []

for i, (t, x) in enumerate(df.iterrows()):
    x = x.values[0]
    score = model.score_one({'data': x}); scores.append(score)
    is_anomaly = model.predict_one({'data': x})
    anomaly_samples.append(is_anomaly if i > GRACE_PERIOD else False)
    model = model.learn_one({'data': x})
    #auc = auc.update(y, is_anomaly)
    quantiles.append(model.quantile.get())
#print(auc)

anomalies_tree = pd.Series(anomaly_samples, index=df.index)

text = (f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

plt.plot(df.index, scores)
plt.plot(df.index, quantiles)

In [None]:
from functions.plot import plot_compare_anomalies

df_anomalies = pd.DataFrame({"Trees": anomalies_tree,
                             "OSVM": anomalies_svm,
                             "ICDF": df_out.anomaly},
                            index=df_out.index)
fig = plot_compare_anomalies(df[col], df_anomalies, save=False)