In [None]:
# Base
import requests
import pandas as pd
import datetime as dt

import plotly.express as px
import plotly.graph_objects as go

# Data
import collections
from river import datasets

from river import *

from streamz import Stream
from streamz.river import RiverTrain, RiverPredict

In [None]:
df = pd.read_csv("/Users/marekwadinger/Documents/PhD/Teach/2022-2023/batch_data_processing/exams/data/data_BESS.csv", index_col=0)

In [None]:
df

In [None]:
df.time = pd.to_datetime(df.time)

In [None]:
df = df.set_index('time')

In [None]:
na_dates = df.index[df['Outside Temperature'].isna()]

Get outside temperature

In [None]:
try:
    url = "https://archive-api.open-meteo.com/v1/archive?hourly=temperature_2m&timezone=Europe%2FBerlin"
    pos = {'longitude': '49.04', 'latitude': '19.72'}
    date_span = {'start_date': na_dates[1].strftime('%Y-%m-%d'), 
                'end_date': na_dates[-1].strftime('%Y-%m-%d')}

    params={**pos, **date_span}

    response = requests.get(url, params=params)
    df_out_temp = pd.DataFrame(response.json()['hourly'])
    df_out_temp.time = pd.to_datetime(df_out_temp.time, utc=True)
    df_out_temp = df_out_temp.set_index('time')['temperature_2m']
    # Scale
    min_temp = -15
    max_temp = 50
    range_temp = max_temp - min_temp

    df_out_temp = (df_out_temp - min_temp) / range_temp
    # Rasample
    df_out_temp = df_out_temp.resample('1t').interpolate()
    # Combine
    df['Outside Temperature'] = df['Outside Temperature'].combine_first(df_out_temp)
except:
    df = df.drop(columns='Outside Temperature', errors='ignore')

We need to interpolate in order to keep only 90 minutes window not longer

In [None]:
'''
df = df.resample('1t').interpolate()
df_mov_mean = df.rolling(window=90).mean().dropna()
df_mov_std = df.rolling(window=90).std().dropna()
df = df.loc[df_mov_mean.index]
df_env_pos = df_mov_mean + 3 * df_mov_std
df_env_neg = df_mov_mean - 3 * df_mov_std
'''

In [None]:
df_shift = pd.concat({key: df.shift(key) for key in range(0, 10)},
                   names=['Time Shift'], axis=1).dropna()

In [None]:
mean_ = []
std_ = []
rmean = utils.TimeRolling(stats.Mean(), period=dt.timedelta(hours=1,
                                                            minutes=30))
rvar = utils.TimeRolling(stats.Var(), 
                          period=dt.timedelta(hours=1, minutes=30))

for t, x in df.iterrows():
    mean_.append(rmean.update(x.SOC, t=t.tz_localize(None)).get())
    std_.append((rvar.update(x.SOC, t=t.tz_localize(None)).get()**(1/2)))

s_mean = pd.Series(mean_, index=df.index)
s_std = pd.Series(std_, index=df.index)

In [None]:
s_env_pos = s_mean + 3 * s_std
s_env_neg = s_mean - 3 * s_std

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=s_env_pos.index.append(s_env_pos.index[::-1]),
    y= pd.concat([s_env_pos, s_env_neg[::-1]]),
    fill='toself',
    fillcolor='rgba(100,0,80,0.2)',
    line_color='rgba(255,255,255,0)',
    showlegend=False,
    name='river Mov Mean SOC',
))

fig.add_trace(go.Scatter(
    x=s_mean.index, y=s_mean,
    line_color='rgb(100,0,80)',
    name='river Mov Mean SOC',
))

In [None]:
import numpy as np

class HalfSpaceTrees(anomaly.HalfSpaceTrees):
  def predict_proba_one(self, x):
    p = anomaly.HalfSpaceTrees.score_one(self, x)
    return {False: 1.0 - p, True: p}


class QuantileFilter(anomaly.QuantileFilter):
  def __init__(self, anomaly_detector, q: float, protect_anomaly_detector=True):
        super().__init__(
            anomaly_detector=anomaly_detector,
            protect_anomaly_detector=protect_anomaly_detector,
            q=q
        )
  def predict_one(self, *args):
    score = self.score_one(*args)
    return score >= (self.quantile.get() or np.inf)

In [None]:
rmean = utils.TimeRolling(stats.Mean(), 
                          period=dt.timedelta(hours=1, minutes=30))
rstd = utils.TimeRolling(stats.Var(), 
                          period=dt.timedelta(hours=1, minutes=30))
to_discard = [i for i in df.columns if i != 'SOC']

def get_rmean(x):
    return compose.Prefixer('mean_').transform_one(
        rmean.update(x.drop('time'), t=x['time'].tz_localize(None)).get()
        )
def get_rstd(x):
    return compose.Prefixer('std_').transform_one(
        rstd.update(x.drop('time'), t=x['time'].tz_localize(None)).get()
        )
def get_stat(x):
    return {**x , **get_rmean(x), **get_rstd(x)}
def get_rdev(x):
    return {**x, 'dev': x['SOC'] - x['mean']}

In [None]:
model = get_stat
model |= compose.FuncTransformer(get_rdev)

In [None]:
mean = stats.Mean()

In [None]:
mean_old = mean.get()

In [None]:
mean_new = mean.update(row.drop('time')).get()

In [None]:
mean_old, mean_new

In [None]:
rmean.update(row.drop('time'), t=row['time'].tz_localize(None)).get()

In [None]:
rstd.update(row.drop('time'), t=row['time'].tz_localize(None)).get()

In [None]:
get_rstd(row)

In [None]:
model.predict_one(row)

In [None]:
rmean = utils.TimeRolling(stats.Mean(), 
                          period=dt.timedelta(hours=1, minutes=30))
rstd = utils.TimeRolling(stats.Var(), 
                          period=dt.timedelta(hours=1, minutes=30))
to_discard = [i for i in df.columns if i != 'SOC']

def get_rmean(x):
    return {**compose.Prefixer('mean_').transform_one(
        rmean.update(x.drop('time'), t=x['time'].tz_localize(None)).get()
        )}
def get_rstd(x):
    return {**compose.Prefixer('std_').transform_one(
        rstd.update(x.drop('time'), t=x['time'].tz_localize(None)).get()
        )}
def get_stat(x):
    return {**x , **get_rmean(x), **get_rstd(x)}
def get_rdev(x):
    return {**x, 'dev': x['SOC'] - x['mean']}

model = get_stat
model |= compose.FuncTransformer(get_rdev)
model |= QuantileFilter(
        HalfSpaceTrees(seed=42),
        q=0.997
    )

anomaly_samples = []
anomaly_env_pos = []
anomaly_env_neg = []
list_env_pos = []
list_env_neg = []

for i, x in df.reset_index().iterrows():
    
    anomaly_samples.append(model.predict_one(x))
    
    x_mean = rmean.get()
    x_std = rstd.get()
    x_env_pos = {"SOC": x_mean + 3 * x_std**(1/2)}
    x_env_neg = {"SOC": x_mean - 3 * x_std**(1/2)}
    list_env_pos.append(x_env_pos)
    list_env_neg.append(x_env_neg)
    anomaly_env_pos.append(model.predict_one(x))
    anomaly_env_neg.append(model.predict_one(x))
    
    model = model.learn_one(x)
    
s_env_pos = pd.Series(list_env_pos, index=df.index)
s_env_neg = pd.Series(list_env_neg, index=df.index)

In [None]:
len(anomaly_samples), sum(anomaly_samples)

In [None]:
sum(pd.Series(anomaly_samples).diff().dropna() == 1)

In [None]:
s_env_pos

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=s_env_pos.index.append(s_env_pos.index[::-1]),
    y= pd.concat([s_env_pos, s_env_neg[::-1]]),
    fill='toself',
    fillcolor='rgba(100,0,80,0.2)',
    line_color='rgba(255,255,255,0)',
    showlegend=False,
    name='Mov Mean SOC',
))

fig.add_trace(go.Scatter(
    x=s_mean.index, y=s_mean,
    line_color='rgb(100,0,80)',
    name='Mov Mean SOC',
))

fig.add_trace(go.Scatter(
    x=df.index, y=df.SOC,
    line_color='rgb(0,140,120)',
    name='SOC',
))

a = pd.Series(anomaly_samples, index=df.index).astype(int).diff()
for x0, x1 in zip(a[a == 1].index, a[a == -1].index):
    fig.add_vrect(x0=x0, x1=x1, fillcolor="red", opacity=0.25)

In [None]:
fig.show()

In [None]:
fig.add_trace(go.Scatter(
    x=df.index, y=pd.Series(anomaly_env_pos).astype(int),
    line_color='rgb(160,0,0)',
    name='Anomaly Env Pos',
))

fig.add_trace(go.Scatter(
    x=df.index, y=pd.Series(anomaly_env_neg).astype(int),
    line_color='rgb(160,0,0)',
    name='Anomaly Env Neg',
))

Explore value score relationship

In [None]:
l = []
for i in [*map(lambda x: x/1000, range(200, 450))]:
    x = {"SOC": i}
    l.append(model.score_one(x))
px.line(x=[*map(lambda x: x/1000, range(200, 450))],y=l)