In [None]:
import datetime as dt
import time
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import norm

from dynamic_signal_limits_service import GaussianScorer
from functions.plot import plot_limits

In [None]:
df = pd.read_csv('data/input/inverter_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
col = 'Inverter Temperature'

In [None]:
df = pd.read_csv('data/input/average_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
col = 'Average Cell Temperature'

In [None]:
# CONSTANTS
THRESHOLD = 0.99735
GRACE_PERIOD=60*24
WINDOW = dt.timedelta(hours=24*7)

model = GaussianScorer(
                grace_period=GRACE_PERIOD, # Number of samples
                period=WINDOW,
                #window_size=60*24*7
                
            )

model_inv = GaussianScorer(
                grace_period=GRACE_PERIOD,
                period=WINDOW
                
            )

anomaly_samples = []
anomaly_samples_ = []
list_thresh_pos = []
list_thresh_neg = []
mus = []
mus_ = []
sigmas = []
sigmas_ = []
samples = []
times = []

for i, (t, x) in enumerate(df.iterrows()):
    start = time.perf_counter_ns()
    t = t.tz_localize(None)
    x = x[col]
    if i == 0:
        model.gaussian.obj = model.gaussian._from_state(0, x, 1e-5, 1)
        model_inv.gaussian.obj = model_inv.gaussian._from_state(0, -x, 1e-5, 1)
        
    is_anomaly = model.predict_one(x)
    anomaly_samples.append(is_anomaly)    

    score_ = model_inv.score_one(-x)
    if i > GRACE_PERIOD:
        is_anomaly_ = 1 if score_ > THRESHOLD else 0
    else:
        is_anomaly_ = 0
    is_anomaly_ = model_inv.predict_one(-x)
    anomaly_samples_.append(is_anomaly_)
    
    pause = time.perf_counter_ns()
    kwargs = {'loc': model.gaussian.mu, 
            'scale': model.gaussian.sigma}
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        real_thresh = norm.ppf((THRESHOLD/2 + 0.5), **kwargs)
    #real_thresh = real_thresh if real_thresh < 1 else 1
    list_thresh_pos.append(real_thresh)
    
    kwargs_inv = {'loc': model_inv.gaussian.mu, 
            'scale': model_inv.gaussian.sigma}
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        real_thresh = -norm.ppf((THRESHOLD/2 + 0.5), **kwargs_inv)
    #real_thresh = real_thresh if real_thresh > 0 else 0
    list_thresh_neg.append(real_thresh)
    # the sample before previous is anomalous
    
    sigmas.append(model.gaussian._var.get())
    mus.append(model.gaussian.mu)
    
    sigmas_.append(model_inv.gaussian._var.get())
    mus_.append(model_inv.gaussian.mu)
    
    samples.append(model.gaussian.n_samples)
    step = time.perf_counter_ns()
    if not is_anomaly or (sum(anomaly_samples_[-300:-1]) / len(anomaly_samples_[-300:-1]) > 0.9973):
        model = model.learn_one(x, **{'t': t})
    if not is_anomaly_ or (sum(anomaly_samples_[-300:-1]) / len(anomaly_samples_[-300:-1]) > 0.9973):
        model_inv = model_inv.learn_one(-x, **{'t': t})
    times.append((time.perf_counter_ns() - step) + (pause - start))

s_mean = pd.Series(mus, index=df.index)
s_std = pd.Series(sigmas, index=df.index)

s_env_pos = s_mean + 3 * s_std**0.5
s_env_neg = s_mean - 3 * s_std**0.5

df_out = pd.DataFrame({"level_high": list_thresh_pos,
                       "level_low": list_thresh_neg,
                       "anomaly": anomaly_samples},
                      index= df.index)

In [None]:
text = (f"Sliding window: {WINDOW}\n"
        f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

In [None]:
df_out.to_csv("publications/ilustrate/pc2023/inverter/inverter_output.csv")

In [None]:
plot_limits(df[col], df_out.anomaly, df_out.level_high, df_out.level_low, 
            save=False)

In [None]:
from river import anomaly
from river import metrics
import numpy as np
from river import feature_extraction as fx


class QuantileFilter(anomaly.QuantileFilter):
    def __init__(self, anomaly_detector, q: float, grace_period: int = 0,
               protect_anomaly_detector=True):
        super().__init__(
            anomaly_detector=anomaly_detector,
            protect_anomaly_detector=protect_anomaly_detector,
            q=q
        )
        self.n = 0
        self.grace_period = grace_period

    def predict_one(self, *args):
        score = self.score_one(*args)
        return score >= (self.quantile.get() or np.inf)

    def learn_one(self, *args):
        self.n += 1
        score = self.score_one(*args)
        if self.n < self.grace_period or not self.protect_anomaly_detector or not self.classify(score):
            self.anomaly_detector.learn_one(*args)
        self.quantile.update(score)
        return self

    
model = (
    QuantileFilter(
        anomaly.OneClassSVM(nu=0.2),
    q=0.995,
    grace_period=0
    )
    )

auc = metrics.ROCAUC()

anomaly_samples = []
scores = []
quantiles = []
times = []
for i, (t, x) in enumerate(df.iterrows()):
    start = time.perf_counter_ns()
    x = x.values[0]
    score = model.score_one({'data': x}); scores.append(score)
    is_anomaly = model.predict_one({'data': x})
    anomaly_samples.append(is_anomaly if i > 0 else False)
    model = model.learn_one({'data': x})
    quantiles.append(model.quantile.get())
    #auc = auc.update(y, is_anomaly)
    times.append(time.perf_counter_ns() - start)
#print(auc)

anomalies_svm = pd.Series(anomaly_samples, index=df.index)

text = (f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

plt.plot(df.index, scores)
plt.plot(df.index, quantiles)
plt.show()

In [None]:
from river import anomaly, preprocessing
from river import metrics

scaler = preprocessing.MinMaxScaler()

model = (
    QuantileFilter(
        anomaly.HalfSpaceTrees(window_size=50,limits={'data':(0,110)}, seed=42),
    q=0.99
    )
    )

auc = metrics.ROCAUC()

anomaly_samples = []
scores = []
quantiles = []
times = []
for i, (t, x) in enumerate(df.iterrows()):
    start = time.perf_counter_ns()
    x = x.values[0]
    score = model.score_one({'data': x}); scores.append(score)
    is_anomaly = model.predict_one({'data': x})
    anomaly_samples.append(is_anomaly if i > GRACE_PERIOD else False)
    model = model.learn_one({'data': x})
    #auc = auc.update(y, is_anomaly)
    quantiles.append(model.quantile.get())
    times.append(time.perf_counter_ns() - start)
#print(auc)

anomalies_tree = pd.Series(anomaly_samples, index=df.index)

text = (f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

plt.plot(df.index, scores)
plt.plot(df.index, quantiles)

In [None]:
df_anomalies = pd.DataFrame({"HST": anomalies_tree,
                             "OSVM": anomalies_svm,
                             "ICDF": df_out.anomaly},
                            index=df_out.index)
df_anomalies.to_csv("publications/ilustrate/pc2023/inverter/comparison_output.csv")

In [None]:
from functions.plot import plot_compare_anomalies

plot_compare_anomalies(df[col], df_anomalies, save=True)