In [None]:
import datetime as dt

import pandas as pd
import plotly.graph_objects as go
from river import anomaly
from scipy.stats import norm

In [None]:
df = pd.read_csv('data/average_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index)
col = 'Average Cell Temperature'

In [None]:
df = pd.read_csv('data/inverter_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index)
col = 'Inverter Temperature'

In [None]:
class GaussianScorer(anomaly.GaussianScorer):
    def __init__(self, 
                 threshold=0.99735, 
                 window_size=None, 
                 period=None, 
                 grace_period=100):
        super().__init__(window_size, period, grace_period)
        self.threshold = threshold
        
    def learn_one(self, x, **kwargs):
        self.gaussian.update(x, **kwargs)
        return self

    def score_one(self, x, t=None):
        if self.gaussian.n_samples < self.grace_period:
            return 0
        return 2 * abs(self.gaussian.cdf(x) - 0.5)
    
    def predict_one(self, x, t=None):
        score = self.score_one(x)
        if self.gaussian.obj.n_samples > self.grace_period:
            return 1 if score > self.threshold else 0
        else:
            return 0
    
threshold = 0.99735
grace_period=60*24
#to_discard = [i for i in df.columns if i != 'SOC']
# Temperature
window = dt.timedelta(hours=24*7)
model = GaussianScorer(
                grace_period=0, # Number of samples
                period=window,
                #window_size=60*24*7
                
            )

model_inv = GaussianScorer(
                grace_period=0,
                period=window
                
            )

anomaly_samples = []
anomaly_samples_ = []
scores = []
scores_ = []
list_thresh_pos = []
list_thresh_neg = []
mus = []
mus_ = []
sigmas = []
sigmas_ = []
samples = []

for i, (t, x) in enumerate(df.iterrows()):
    if i == 0:
        model.gaussian._var.mean._mean = x[col]
        model_inv.gaussian._var.mean._mean = -x[col]
    t = t.tz_localize(None)
    x = x[col]
    score = model.score_one(x); scores.append(score)
    samples.append(model.gaussian.n_samples)
    is_anomaly = model.predict_one(x)
    anomaly_samples.append(is_anomaly)    
    score_ = model_inv.score_one(-x); scores_.append(score_)
    #anomaly_samples.append(model_inv.classify(score_))
    if i > grace_period:
        is_anomaly_ = 1 if score_ > threshold else 0
    else:
        is_anomaly_ = 0
    anomaly_samples_.append(is_anomaly_)
    
    kwargs = {'loc': model.gaussian.mu, 
              'scale': model.gaussian.sigma}
    sigmas.append(model.gaussian._var.get())
    mus.append(model.gaussian.mu)
    real_thresh = norm.ppf((threshold/2 + 0.5), **kwargs)
    real_thresh = real_thresh if real_thresh < 1 else 1
    list_thresh_pos.append(real_thresh)
    
    kwargs_inv = {'loc': model_inv.gaussian.mu, 
              'scale': model_inv.gaussian.sigma}
    sigmas_.append(model_inv.gaussian._var.get())
    mus_.append(model_inv.gaussian.mu)
    real_thresh = -norm.ppf((threshold/2 + 0.5), **kwargs_inv)
    real_thresh = real_thresh if real_thresh > 0 else 0
    list_thresh_neg.append(real_thresh)
    # the sample before previous is anomalous
    if not is_anomaly or (sum(anomaly_samples[-300:-1]) / len(anomaly_samples[-300:-1]) > 0.9973):
        model = model.learn_one(x, **{'t': t})
    if not is_anomaly_ or (sum(anomaly_samples_[-300:-1]) / len(anomaly_samples_[-300:-1]) > 0.9973):
        model_inv = model_inv.learn_one(-x, **{'t': t})
    
s_thresh_pos = pd.Series(list_thresh_pos, index=df.index)
s_thresh_neg = pd.Series(list_thresh_neg, index=df.index)

s_mean = pd.Series(mus, index=df.index)
s_std = pd.Series(sigmas, index=df.index)

s_env_pos = s_mean + 3 * s_std**0.5
s_env_neg = s_mean - 3 * s_std**0.5

# For anomaly plotting
a = pd.Series(anomaly_samples, index=df.index).astype(int).diff()
# Show dates of anomalous events
b = a[a == 1].resample('1d').sum()

In [None]:
text = (f"Sliding window: {window}\n"
        f"Proportion of anomalous samples: "
        f"{sum(anomaly_samples)/len(anomaly_samples)*100:.02f}%\n"
        f"Total number of anomalous events: "
        f"{sum(pd.Series(anomaly_samples).diff().dropna() == 1)}")
print(text)

In [None]:
file_name = (f"{col.replace(' ', '_')}_"
             f"{int(window.total_seconds()/60/60)}_hours_sliding")
save = True

fig = go.Figure()

fig.update_layout(
    height=90*3,
    width=120*3,
    
    yaxis_title="Normalized Temperature",
    yaxis_title_standoff = 0,
    yaxis_range=[0,1],
    xaxis_tickangle=60,
    xaxis_tickfont_size=9,
    xaxis_tickvals=b[b > 0].index,
    
    font_family="Times New Roman",
    font_size=9,
    
    autosize=True,
    
    margin=dict(l=40, r=15, t=0, b=0),
    bargap=0,
        
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# To show discontinuation of the signal stream
d = df.copy()
d.index = d.index.round("1T")
d = d.resample('2T').min()

fig.add_trace(go.Scatter(
    x=d.index, y=abs(d[col]),
    connectgaps=False,
    line_color='rgb(0,140,120)',
    name=col, showlegend=True,
    line_width=0.7
))

if save:
    fig.write_image(f"{file_name}_signal.pdf")

fig.add_trace(go.Scatter(
    x=s_env_pos.index.append(s_env_pos.index[::-1]),
    y= pd.concat([s_env_pos, s_env_neg[::-1]]),
    fill='toself',
    fillcolor='rgba(100,0,80,0.2)',
    line_color='rgba(255,255,255,0)',
    showlegend=False,
    name=f'Mov Mean {col}',
    line_width=0.7
))

fig.add_trace(go.Scatter(
    x=s_mean.index, y=s_mean,
    line_color='rgb(100,0,80)',
    name=f'Mov Mean {col}',
    line_width=0.7
))

if save:
    fig.write_image(f"{file_name}_mean.pdf")
for trace, visibility in zip([-1, -2],
                             [False, False]):
    fig.data[trace].visible = visibility

for x0, x1 in zip(a[a == 1].index, a[a == -1].index):
    fig.add_vrect(x0=x0, x1=x1, line_color="red", fillcolor="red", 
                  opacity=0.25)
    
if save:
    fig.write_image(f"{file_name}_anomalies.pdf")

fig.add_trace(go.Scatter(
    x=df.index, y=([1] if s_thresh_pos.max(skipna=True) < 1 
                   else [s_thresh_pos.max(skipna=True)])*len(df),
    line_color='rgba(100,100,100, 0)', 
    name='Threshold', legendgroup='thresh', showlegend=False,
    line_width=0.7
))

fig.add_trace(go.Scatter(
    x=s_thresh_pos.index, y=s_thresh_pos,
    line_color='rgba(100,0,0,0.25)',
    fillcolor='rgba(100,0,0, 0.1)', fill="tonexty",
    name='Threshold', legendgroup='thresh', showlegend=True,
    line_width=0.7
))

fig.add_trace(go.Scatter(
    x=s_thresh_neg.index, y=s_thresh_neg,
    line_color='rgba(100,0,0,0.25)', 
    fillcolor='rgba(100,0,0, 0.1)', fill="tozeroy",
    name='Threshold', legendgroup='thresh', showlegend=False,
    line_width=0.7
))

if save:
    fig.write_image(f"{file_name}_thresh.pdf")

text = text.replace('\n', '<br>')
fig.add_annotation(text=text, align='left',
                  xref="paper", yref="paper",
                  x=0, y=1.2, showarrow=False)

fig.update_layout(
    height=90*10,
    width=120*10,
)
fig.write_html(f"{file_name}_all.html")

fig.show()