# Eco-Pack Presov

In [None]:
%load_ext autoreload
%autoreload 2

### Import

In [None]:
import datetime as dt
import sys

from pathlib import Path

import numpy as np
import pandas as pd

from river import metrics
from river import utils, compose
from river.proba import Gaussian
from scipy.stats import kstest

sys.path.insert(1, str(Path().resolve().parent))
from functions.anomaly import (GaussianScorer, ConditionalGaussianScorer,
                               ThresholdChangeFilter)
from functions.proba import MultivariateGaussian
from functions.plot import plot_limits_3d, plot_limits_grid

from models.bess_model import BESS

### Load

In [None]:
df = pd.read_csv("/Users/marekwadinger/PyProjects/online_outlier_detection/examples/data/kokam/20230908_eco_pack_presov.csv", index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
df.columns = df.columns.str.replace("Kokam_String:", "")
ser_gt = df['F_Module Fan Fault']
ser_gt[ser_gt.isna()] = 0

In [None]:
df = df.interpolate()

In [None]:
df_avg = pd.read_csv("/Users/marekwadinger/PyProjects/online_outlier_detection/examples/data/kokam/20230908_model_input.csv", index_col=0)
df_avg.index = pd.to_datetime(df_avg.index, utc=True)
df_avg.columns = df_avg.columns.str.replace("Kokam_String:", "")
df_avg = df_avg[['Avg. Cell Temperature']]

### Preprocess

In [None]:
df = df.loc[:, df.columns.str.contains("Temperature")]

In [None]:
df = df['2023-08-20':'2023-08-24']

### Feature Engineering

In [None]:
df_merged = pd.merge_asof(df, df_avg, left_index=True, right_index=True)

In [None]:
df_avg = df_merged.pop("Avg. Cell Temperature")

In [None]:
df = df.sub(df_avg, axis=0)

In [None]:
from sklearn.preprocessing import MinMaxScaler

df = pd.DataFrame(
    MinMaxScaler().fit_transform(df), index=df.index, columns=df.columns)

In [None]:
df.head()

### Train and Evaluate Detection

In [None]:
# CONSTANTS
days = 7
WINDOW = dt.timedelta(hours=24*days)
minutes = int(WINDOW.total_seconds()/60)
GRACE_PERIOD=48*60
THRESHOLD = 0.99977

model = (
        ConditionalGaussianScorer(
            utils.TimeRolling(MultivariateGaussian(), period=WINDOW),
            grace_period=GRACE_PERIOD,
            t_a = int(minutes),
            threshold=THRESHOLD,
            )
    )

sampling_model = GaussianScorer(Gaussian(), grace_period=GRACE_PERIOD)

system_anomaly = []
signal_anomaly = []
sampling_anomaly = []
change_point = []
list_thresh_pos = []
list_thresh_neg = []
mus = []
covars = []
sigmas = []
samples = []
scores = []

for i, (t, x) in enumerate(df.iterrows()):
    t = t.tz_localize(None)
    x = x.to_dict()
    x_ = x
    # Check anomaly in system
    is_anomaly = model.predict_one(x); system_anomaly.append(is_anomaly)
    score = model.score_one(x); scores.append(score)
    # Get signal thresholds
    thresh_high, thresh_low = model.limit_one(x_)
    list_thresh_pos.append(thresh_high)
    list_thresh_neg.append(thresh_low)
    # Check anomaly in signals
    if i != 0:
        signal_anomaly.append({k: not ((thresh_low[k] < v) and
                                       (v < thresh_high[k])) 
                               for i, (k, v) in enumerate(x_.items())})
    else:
        signal_anomaly.append({k: False for i, (k, v) in enumerate(x.items())})
    # Check anomaly in sampling
    if i != 0:
        score_ = sampling_model.score_one((t-t_prev).seconds)
        if sampling_model.gaussian.n_samples > sampling_model.grace_period:
            sample_a = 1 if ((1-0.99735) > score_) or (score_ > 0.99735) else 0
        else: 
            sample_a = 0
        if sample_a:
            sampling_anomaly.append(1)
            w = 1-score_
        else:
            sampling_anomaly.append(0)
            w = 1
        sampling_model.learn_one((t-t_prev).seconds, w=w)
    else:
        sampling_anomaly.append(0)
    t_prev = t
    
    mus.append(model.gaussian.mu)
    covars.append(model.gaussian.var)
    sigmas.append(model.gaussian.sigma)
    samples.append(model.gaussian.n_samples)
    if i != 0:
        is_change = (sum(system_anomaly[-int(minutes/days):-1]) / 
                    len(system_anomaly[-int(minutes/days):-1]) > (1 - 0.95))
    else:
        is_change = 0
    change_point.append(is_change)

    model = model.learn_one(x, **{'t': t})

for i in range(len(sampling_anomaly)):
    if sampling_anomaly[i] == 1:
        sampling_anomaly[i-1] = 1

df_stats = pd.DataFrame({"mus": mus,
                       "covars": covars,
                       "sigmas": sigmas},
                      index= df.index)

df_out = pd.DataFrame({"level_high": list_thresh_pos,
                       "level_low": list_thresh_neg,
                       "System Anomaly": system_anomaly,
                       "Signal Anomaly": signal_anomaly,
                       "Sampling Anomaly": sampling_anomaly,
                       "Changepoint": change_point,
                       "scores": scores},
                      index= df.index)

In [None]:
text = (f"Sliding window: {WINDOW}\n"
        f"Proportion of anomalous samples: "
        f"{sum(system_anomaly)/len(system_anomaly)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(system_anomaly).diff().dropna() == 1)}\n"
        f"Total number of change points: "
        f"{sum(change_point)}\n"
        f"Total number of sampling anomalies: "
        f"{sum(sampling_anomaly)}")

print(text)

In [None]:
from publications.ilustrate.pc2023.plot_matplotlib import *

def plot_covariance_ellipse(mean, sigma, ax, probability_interval= 0.99735):
    import matplotlib.transforms as transforms
    from matplotlib.patches import Ellipse, Rectangle
    from scipy.stats import chi
    if ax is None:
        _, ax = plt.subplots(figsize=(8, 8))

    # Eigenvalue decomposition of the covariance matrix
    eigvals, eigvecs = np.linalg.eig(sigma)
    angle = np.degrees(np.arctan2(*eigvecs[:, 0][::-1]))
    rad = np.arctan2(*eigvecs[:, 0][::-1])
    print(angle)
    # Calculate the width and height of the ellipse corresponding to the desired probability_interval
    # The ellipse's radius is the square root of the inverse of the chi-squared distribution with 2 degrees of freedom
    chi_val = chi.ppf(probability_interval, 2)
    radius_width = np.sqrt(eigvals[0]) * chi_val
    radius_height = np.sqrt(eigvals[1]) * chi_val
    print(radius_width, radius_height, angle)
    ellipse = Ellipse(xy=mean, width=radius_width, height=radius_height,
                    angle=angle, alpha=0.25, color=colors[0])
    ax.add_patch(ellipse)

    # Plot the mean point
    ax.plot(mean[0], mean[1], 'ro', markersize=5)

    # Set axis labels
    ax.set_xlabel('X-axis')
    ax.set_ylabel('Y-axis')

    # Set title
    ax.set_title('Covariance Ellipse')

    bbox_display = ellipse.get_window_extent()
    bbox_data = bbox_display.transformed(ax.transData.inverted())
    x_min, y_min = bbox_data.min
    x_max, y_max = bbox_data.max
    rectangle = Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, alpha=0.25, color=(1, 0, 0, 0.5), fill=False)
    ax.add_patch(rectangle)
    print(bbox_data)
    print(ellipse.get_corners())

def on_click(event):
    import matplotlib
    # Check if the event occurred in the first subplot
    if event.inaxes == axs[1]:
        x, y = event.xdata, event.ydata
        x_date = matplotlib.dates.num2date(x)
        pos = df.index.get_indexer(
            [x_date - WINDOW, x_date], method='nearest')
        points = df.iloc[pos[0]:pos[1]]
        a = anomalies.iloc[pos[0]:pos[1]]
        mean = mus[pos[1]]
        sigma = sigmas[pos[1]]
        axs[3].clear()
        axs[3].scatter(points[a!=1].iloc[:,0], points[a!=1].iloc[:,1], s=1)
        axs[3].scatter(points.iloc[-1,0], points.iloc[-1,1], marker='x', s=30, color=colors[5] if a[-1] else colors[0])
        plot_covariance_ellipse(mean, sigma, axs[3], 
                                THRESHOLD**len(df.columns))
        axs[3].set_xlim(df.iloc[:, 0].min(), df.iloc[:, 0].max())
        # Update the second subplot
        fig.canvas.draw()

# fig.canvas.mpl_connect('button_press_event', on_click)

# plt.show()


In [None]:
import PyQt6.QtCore
%matplotlib qt
plot_limits_grid_(
    df,
    df_out["System Anomaly"],
    df_out["Changepoint"], 
    df_out["Sampling Anomaly"],
    ser_gt.rename("Fan Fault"),
    ser_high=df_out.iloc[1:, :]['level_high'],
    ser_low=df_out.iloc[1:, :]['level_low'],
    signal_anomaly=df_out["Signal Anomaly"],
    file_name="Kokam",
    save=True,
    **{
        'resample': '30s',
        'grace_period': GRACE_PERIOD
        })