In [None]:
import os
os.chdir("pc2023")

In [None]:
from typing import Union
from plot_matplotlib import *

In [None]:
PLOT_WIDTH = 0.75*398.3386

# Simulation

In [None]:
plt.rcParams["figure.subplot.left"] = 0.12
plt.rcParams["figure.subplot.bottom"] = 0.12
plt.rcParams["figure.subplot.right"] = 0.95
plt.rcParams["figure.subplot.top"] = 0.9
plt.rcParams["figure.subplot.hspace"] = 0.25

In [None]:
import numpy as np
import control
import scipy.signal as sig
import matplotlib.pyplot as plt

F1 = 0.8
F2 = 0.1
k11 = 1
k22 = 1.3

q0s = 0.2

Q0s = np.arange(0.1,10,0.1)
XData = []
YData = []

h2s = (q0s/k22)**2
h1s = (q0s/k11)**2 + h2s

K1 = k11*(1/(2*np.sqrt(h1s)))
K2 = k22*(1/(2*np.sqrt(h2s)))

A = np.array([[-K1/F1, 0], [K1/F2, -K2/F2]])
B = np.array([[1/F1], [1/F2]])
C = np.array([[1, 0], [0, 1]])
D = np.array([[0], [0]])

Ts = 0.5
A_d, B_d, C_d, D_d, _ = sig.cont2discrete((A,B,C,D), Ts)
Q = np.array([[1,0],[0,1]])
R = np.array([[0.001]])

K, S, E = control.dlqr(A_d,B_d,Q,R)

In [None]:
def simulate(system, sim_time=300, faults=False):
    t_out = []
    y_ref = []
    y_out = []
    u_out = []
    labels = []

    q0_max = 10
    q0_min = 0
    lin_degradation = np.linspace(0, -10, sim_time)

    q0s = 5
    h2s = (q0s/k22)**2
    h1s = (q0s/k11)**2 + h2s
    h_ref = [h1s, h2s]
    h_ode = h_ref
    state = 0
    fault_sensor = 0
    fault_actuator = 0
    for t in range(sim_time):
      tspan = [t, t+Ts]
      if t == 50:
        q0s = 4
        h2s = (q0s/k22)**2
        h1s = (q0s/k11)**2 + h2s
        h_ref = [h1s, h2s]
        state = 1
      elif t == 100:
        q0s = 0.5
        h2s = (q0s/k22)**2
        h1s = (q0s/k11)**2 + h2s
        h_ref = [h1s, h2s]
        state = 2
      elif t == 200:
        q0s = 2.5
        h2s = (q0s/k22)**2
        h1s = (q0s/k11)**2 + h2s
        h_ref = [h1s, h2s]
        state = 3
      labels.append(state)
      
      if t > 2:
        if labels[-2] == -1 and labels[-2] != -1:  
          labels[-1] = -1
        if labels[-2] == -1 and labels[-2] != -1:  
          labels[-1] = -1
        
      K = np.array([0.5, 0.1])
      if faults:
        fault_sensor = random.randint(1, 50)
      else:
        fault_sensor = 0
      u = -K @ (np.array(h_ode if fault_sensor != 1 else h_ode*0) - np.array(h_ref))
      q0 = u + q0s
      q0 = max(min(q0, q0_max), q0_min)
      if faults:
        fault_actuator = random.randint(1, 50)
      else:
        fault_actuator = 0
      if fault_actuator == 1:
        q0 = q0*0.5
      x_ode = solve_ivp(system, tspan, h_ode, args=(q0,), method="RK45")
      h_ode = x_ode.y[:,-1]
      
      if fault_sensor == 1:  
        labels[-1] = -1
      if fault_actuator == 1:  
        labels[-1] = -1
      y_out.append(h_ode if fault_sensor != 1 else h_ode*0)
      y_ref.append(h_ref)
      t_out.append(t)
      u_out.append(q0)
    y_ref = np.array(y_ref)
    y_out = np.array(y_out)
    t_out = np.array(t_out)
    u_out = np.array(u_out)

    return t_out, y_ref, y_out, u_out, labels
    

In [None]:
from scipy.integrate import solve_ivp
import random
random.seed(40)
def system(t, x, u):
  h1, h2 = x
  q0 = u

  dh1_dt = q0/F1 - k11/F1 * np.sqrt(h1-h2)
  dh2_dt = k11/F2 * np.sqrt(h1-h2) - k22/F2 * np.sqrt(h2)

  return [dh1_dt, dh2_dt]

q0_max = 10
q0_min = 0
t_out, y_ref, y_out_s, u_out_s, labels = simulate(system, sim_time=300, faults=False)

fig, axs = plt.subplots(nrows=2, ncols=1, figsize=set_size(PLOT_WIDTH, subplots=(1,1)), sharex=True)
axs[0].plot(t_out, y_out_s[:,0], label=r'$y_{\mathrm{s}}$')
axs[0].plot(t_out, y_ref[:,0], label=r'$y_{\mathrm{ref}}$', linestyle=":")
axs[0].set_ylabel('$y$')
axs[0].set_title('a) Response of a System')
axs[0].legend()

axs[1].plot(t_out, u_out_s, label=r'$u_{\mathrm{s}}$')
color = next(axs[1]._get_lines.prop_cycler)
axs[1].axhline(q0_max, **color, linestyle=":")
axs[1].axhline(q0_min, **color, linestyle=":")
axs[1].set_xlabel('$t$')
axs[1].set_ylabel('$u$')
axs[1].set_title('b) Control Action')
axs[1].legend()

plt.savefig("sample/simulation.pdf")
plt.show()

In [None]:
from scipy.integrate import solve_ivp
import random
random.seed(40)
def system_perturbed(t, x, u):
  h1, h2 = x
  q0 = u

  dh1_dt = q0/F1 - k11/F1 * np.sqrt(h1-h2) + random.normalvariate(0,5)*1+h1/50
  dh2_dt = k11/F2 * np.sqrt(h1-h2) - k22/F2 * np.sqrt(h2) + random.normalvariate(0,5)*1+h1/50

  return [dh1_dt, dh2_dt]

q0_max = 10
q0_min = 0
t_out, y_ref, y_out_r, u_out_r, labels = simulate(system_perturbed, sim_time=300, faults=True)

fig, axs = plt.subplots(nrows=2, ncols=1, figsize=set_size(PLOT_WIDTH, subplots=(1,1)), sharex=True)
axs[0].plot(t_out, y_out_r[:,0], label=r'$y_{\mathrm{r}}$')
axs[0].plot(t_out, y_ref[:,0], label=r'$y_{\mathrm{ref}}$', linestyle=":")
axs[0].set_ylabel('$y$')
axs[0].set_title('a) Response of a System')
axs[0].legend()

axs[1].plot(t_out, u_out_r, label=r'$u_{\mathrm{r}}$')
color = next(axs[1]._get_lines.prop_cycler)
axs[1].axhline(q0_max, **color, linestyle=":")
axs[1].axhline(q0_min, **color, linestyle=":")
axs[1].set_xlabel('$t$')
axs[1].set_ylabel('$u$')
axs[1].set_title('b) Control Action')
axs[1].legend()

plt.savefig("sample/real.pdf")
plt.show()

In [None]:
from scipy.integrate import solve_ivp
import random
random.seed(40)
def system_perturbed(t, x, u):
  h1, h2 = x
  q0 = u

  dh1_dt = q0/F1 - k11/F1 * np.sqrt(h1-h2) + random.normalvariate(0,5)*1+h1/50
  dh2_dt = k11/F2 * np.sqrt(h1-h2) - k22/F2 * np.sqrt(h2) + random.normalvariate(0,5)*1+h1/50

  return [dh1_dt, dh2_dt]
q0_max = 10
q0_min = 0
t_out, y_ref, y_out, u_out, _ = simulate(system_perturbed, sim_time=300, faults=False)

fig, axs = plt.subplots(nrows=2, ncols=1, figsize=set_size(PLOT_WIDTH, subplots=(1,1)), sharex=True)
color = next(axs[0]._get_lines.prop_cycler)
axs[0].plot(t_out, y_out[:,0], **color, label='$y^*$')
axs[0].plot(t_out, y_out_r[:,0], label=r'$y_{\mathrm{r}}$',
            **color, alpha=0.25)
axs[0].plot(t_out, y_ref[:,0], label=r'$y_{\mathrm{ref}}$', linestyle=":")
axs[0].scatter(t_out[np.array(labels) == -1], 
               y_out_r[:, 0][np.array(labels) == -1], 
               marker='x', color='red', label=r'$y_{\mathrm{a}}$')
axs[0].set_ylabel('$y$')
axs[0].set_title('a) Response of a System')
axs[0].legend()

color = next(axs[1]._get_lines.prop_cycler)
axs[1].plot(t_out, u_out, **color, label='$u^*$')
axs[1].plot(t_out, u_out_r, label=r'$u_{\mathrm{r}}$', **color, alpha=0.25)
color = next(axs[1]._get_lines.prop_cycler)
axs[1].axhline(q0_max, **color, linestyle=":")
axs[1].axhline(q0_min, **color, linestyle=":")
axs[1].set_xlabel('$t$')
axs[1].set_ylabel('$u$')
axs[1].set_title('b) Control Action')
axs[1].legend()

plt.savefig("sample/imagined.pdf")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler

from matplotlib.colors import ListedColormap
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']


def get_cmap(Z):
    return ListedColormap(colors[0:len(np.unique(Z))])


X = np.array([u_out_r, y_out_r[:, 0]]).T
y = labels
X_train = X[0:200, :]
X_test = X[201::, :]
y_train = y[0:200]
y_train = np.where(np.array(y_train) == -1, 4, y_train)
y_test = y[201::]
# Initialize KMeans with 3 clusters and fit the data
# nu=outliers_fraction)
unsupervised = IsolationForest(contamination=0.05, random_state=40)
unsupervised.fit(X_train)

# Initialize DecisionTreeClassifier and fit the data
supervised = DecisionTreeClassifier(random_state=10, max_depth=4)
supervised = LogisticRegressionCV(max_iter=1000,
                                  class_weight={4: 3})
supervised.fit(StandardScaler().fit_transform(X_train), y_train)
y_pred = supervised.predict(StandardScaler().fit_transform(X_train))

# Set up the figure with 2 rows and 2 columns
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=set_size(PLOT_WIDTH, subplots=(2, 2)),
                        sharex=True)

xlim = (0, 10)
ylim = (0, 50)
for ax in axs.flatten():
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

# Plot the data with labels
s = axs[0, 0].scatter(X_train[:, 0], X_train[:, 1],
                      c=y_train, cmap=ListedColormap(colors[0:4]))
axs[0, 0].set_ylabel('$y$')
axs[0, 0].set_title('a) Data with labels')
legend1 = axs[0, 0].legend(
    s.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 40$",
     r"$y_{\mathrm{ref}} = 25$",
     r"$y_{\mathrm{ref}} = 0$",
     r"$y_{\mathrm{a}}$"])
axs[0, 0].add_artist(legend1)

plt.savefig("ml_gaps/un_supervised_1.pdf")

xx, yy = np.meshgrid(np.linspace(0, 10, 200), np.linspace(0, 50, 200))
Z = supervised.predict(StandardScaler().fit_transform(
    np.c_[xx.ravel(), yy.ravel()]))
Z = Z.reshape(xx.shape)
# Plot the decision lines of the Decision Tree Classifier
axs[1, 0].contourf(xx, yy, Z, alpha=0.4, cmap=get_cmap(Z))
axs[1, 0].scatter(X_train[:, 0], X_train[:, 1], c=np.choose(y_pred, colors))
axs[1, 0].set_title(f'b) Supervised Learning')
axs[1, 0].set_xlabel('$u$')
axs[1, 0].set_ylabel('$y$')

plt.savefig("ml_gaps/un_supervised_2.pdf")

# Plot the data without labels
axs[0, 1].scatter(X_train[:, 0], X_train[:, 1], c='grey')
axs[0, 1].set_title('c) Data without labels')

plt.savefig("ml_gaps/un_supervised_3.pdf")

Z = unsupervised.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the data with cluster assignments
axs[1, 1].scatter(X_train[:, 0], X_train[:, 1], c=np.choose(np.where(
    unsupervised.predict(X_train) == -1, 0, unsupervised.predict(X_train)), ['#d62728', '#1f77b4']))
axs[1, 1].imshow(
    Z,
    interpolation="nearest",
    extent=(xx.min(), xx.max(), yy.min(), yy.max()),
    cmap=ListedColormap(['#d62728', '#1f77b4']),
    alpha=0.4,
    aspect="auto",
    origin="lower",
)
axs[1, 1].set_title(f'd) Unsupervised Learning')
axs[1, 1].set_xlabel('$u$')


# Show the plot
plt.savefig("ml_gaps/un_supervised_4.pdf")
plt.show()

In [None]:
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

w, h = set_size(PLOT_WIDTH)
fig, ax = plt.subplots(nrows=1, ncols=1, 
                        figsize=(w/2, h),
                        sharex=True)

xlim = (0, 10)
ylim = (0, 50)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

# Plot the data with labels
s = ax.scatter(X_train[:, 0], X_train[:, 1],
                      c=y_train, cmap=ListedColormap(colors[0:4]))
ax.set_xlabel('$u$')
ax.set_ylabel('$y$')
legend1 = ax.legend(
    s.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 40$",
     r"$y_{\mathrm{ref}} = 25$",
     r"$y_{\mathrm{ref}} = 0$",
     r"$y_{\mathrm{a}}$"])
ax.add_artist(legend1)
fig.savefig("sample/labeled_data.pdf")

In [None]:
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#9467bd', '#d62728', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
y_all = np.where(np.array(y) == -1, 5, y)

w, h = set_size(PLOT_WIDTH)
fig, ax = plt.subplots(nrows=1, ncols=1, 
                        figsize=(w/2, h),
                        sharex=True)

xlim = (0, 10)
ylim = (0, 50)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

# Plot the data with labels
s = ax.scatter(X[:, 0], X[:, 1], c=y_all, cmap=ListedColormap(colors[0:5]))
ax.set_xlabel('$u$')
ax.set_ylabel('$y$')
legend1 = ax.legend(
    s.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 40$",
     r"$y_{\mathrm{ref}} = 25$",
     r"$y_{\mathrm{ref}} = 0$",
     r"$y_{\mathrm{ref}} = 10$",
     r"$y_{\mathrm{a}}$"])
ax.add_artist(legend1)

fig.savefig("sample/new_data.pdf")

In [None]:
w, h = set_size(PLOT_WIDTH)
fig, ax = plt.subplots(nrows=1, ncols=1, 
                        figsize=(w/2, h),
                        sharex=True)

xlim = (0, 10)
ylim = (0, 50)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xlabel('$u$')
ax.set_ylabel('$y$')
# Plot the data with labels
ax.scatter(X[:, 0], X[:, 1], c='grey')
fig.savefig("sample/unlabeled_data.pdf")

In [None]:
w, h = set_size(PLOT_WIDTH)
fig, ax = plt.subplots(nrows=1, ncols=1, 
                        figsize=(w/2, h),
                        sharex=True)

xlim = (0, 10)
ylim = (0, 50)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xlabel('$u$')
ax.set_ylabel('$y$')
ax.set_xlabel('$u$')
ax.set_ylabel('$y$')
# Plot the data with labels
ax.scatter(X[4, 0], X[4, 1], c='grey')
fig.savefig("sample/unlabeled_data_4.pdf")

In [None]:
w, h = set_size(PLOT_WIDTH)
fig, ax = plt.subplots(nrows=1, ncols=1, 
                        figsize=(w/2, h),
                        sharex=True)

xlim = (0, 10)
ylim = (0, 50)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

# Plot the data with labels
s = ax.scatter(X_train[:, 0], X_train[:, 1],
                      c=y_train, cmap=ListedColormap(colors[0:4]))
ax.set_ylabel('$y$')
legend1 = ax.legend(
    s.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 40$",
     r"$y_{\mathrm{ref}} = 25$",
     r"$y_{\mathrm{ref}} = 0$",
     r"$y_{\mathrm{a}}$"])
ax.add_artist(legend1)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
import random
random.seed(40)
from matplotlib.colors import ListedColormap
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

def get_cmap(Z):
  return ListedColormap(colors[0:len(np.unique(Z))])


# Add one sample from missing class to the test set
y_test = np.where(np.array(y_test) == -1, 5, y_test)

# Initialize DecisionTreeClassifier and fit the data
supervised = make_pipeline(
  StandardScaler(), 
  LogisticRegressionCV(max_iter=1000,
                       class_weight={4: 3}))
supervised.fit(X_train, y_train)

from river import tree
from river import linear_model
from river import preprocessing
from river import optim
from river import anomaly

class QuantileFilter(anomaly.QuantileFilter):
  def __init__(self, anomaly_detector, q: float, protect_anomaly_detector=True):
        super().__init__(
            anomaly_detector=anomaly_detector,
            protect_anomaly_detector=protect_anomaly_detector,
            q=q
        )
  def predict_one(self, *args):
    score = self.score_one(*args)
    return score >= (self.quantile.get() or np.inf)
online = (
    preprocessing.MinMaxScaler() |
    QuantileFilter(
        anomaly.HalfSpaceTrees(
            window_size=150,
            n_trees=30,
            height=10,
            seed=40),
        q=0.98
    )
)

# Visualize data
import matplotlib.pyplot as plt

fig, axs = plt.subplots(2, 2, figsize=set_size(PLOT_WIDTH, subplots=(2,2)),
                        sharex=True)
xlim = (0, 10)
ylim = (0, 50)
for ax in axs.flatten():
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

# Subplot 1: train data
s = axs[0, 0].scatter(X_train[:, 0], X_train[:, 1], c='grey')
axs[0, 0].set_title("a) Training Data")
axs[0, 0].set_ylabel('$y$')

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#9467bd', '#d62728', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

xx, yy = np.meshgrid(np.linspace(0, 10, 200), np.linspace(0, 50, 200))
Z = unsupervised.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the data with cluster assignments
sc10 = axs[1, 0].scatter(X_train[:, 0], X_train[:, 1], c=np.choose(np.where(
    unsupervised.predict(X_train) == -1, 0, unsupervised.predict(X_train)), ['#d62728', '#1f77b4']))
im10 = axs[1, 0].imshow(
    Z,
    interpolation="nearest",
    extent=(xx.min(), xx.max(), yy.min(), yy.max()),
    cmap=ListedColormap(['#d62728', '#1f77b4']),
    alpha=0.4,
    aspect="auto",
    origin="lower",
)
axs[1, 0].set_title('b) Batch Learning')
axs[1, 0].set_xlabel('$u$')
axs[1, 0].set_ylabel('$y$')

fig.savefig("ml_gaps/batch_online_1.pdf")

# Subplot 2: test data
axs[0, 1].scatter(X_test[:, 0], X_test[:, 1], c=np.choose(y_test, colors))
s3 = axs[0, 1].scatter(X_test[:, 0], X_test[:, 1], c='grey')
axs[0, 1].set_title("c) Testing Data")

fig.savefig("ml_gaps/batch_online_2.pdf")

sc10.remove()
im10.remove()
axs[1, 0].scatter(X[:, 0], X[:, 1], c=np.choose(np.where(
    unsupervised.predict(X) == -1, 0, unsupervised.predict(X)), ['#d62728', '#1f77b4']))
axs[1, 0].imshow(
    Z,
    interpolation="nearest",
    extent=(xx.min(), xx.max(), yy.min(), yy.max()),
    cmap=ListedColormap(['#d62728', '#1f77b4']),
    alpha=0.4,
    aspect="auto",
    origin="lower",
)

fig.savefig("ml_gaps/batch_online_3.pdf")

# Subplot 4: online model
y_online = []
for x_, y_ in zip(X, y):
  x_ = {i: v for i, v in enumerate(x_)}
  y_pred = online.predict_one(x_)
  y_online.append(y_pred) if y_pred is not None else y_online.append(0)
  online.learn_one(x_, y_)
axs[1, 1].scatter(X[:, 0], X[:, 1], c=np.choose(y_online, ['#1f77b4', '#d62728']))
axs[1, 1].set_title('d) Online Learning')
axs[1, 1].set_xlabel('$u$')

# plt.subplots_adjust(hspace=0.3, wspace=0.2)

# Show the plot
plt.savefig("ml_gaps/batch_online_4.pdf")
plt.show()

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
import random
random.seed(40)
from matplotlib.colors import ListedColormap
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

def get_cmap(Z):
  return ListedColormap(colors[0:len(np.unique(Z))])


# Add one sample from missing class to the test set
y_test = np.where(np.array(y_test) == -1, 5, y_test)

# Initialize DecisionTreeClassifier and fit the data
supervised = make_pipeline(
  StandardScaler(), 
  LogisticRegressionCV(max_iter=1000,
                       class_weight={4: 3}))
supervised.fit(X_train, y_train)

from river import tree
from river import linear_model
from river import preprocessing
from river import optim
from river import anomaly

class QuantileFilter(anomaly.QuantileFilter):
  def __init__(self, anomaly_detector, q: float, protect_anomaly_detector=True):
        super().__init__(
            anomaly_detector=anomaly_detector,
            protect_anomaly_detector=protect_anomaly_detector,
            q=q
        )
  def predict_one(self, *args):
    score = self.score_one(*args)
    return score >= (self.quantile.get() or np.inf)
online = (
    preprocessing.MinMaxScaler() |
    QuantileFilter(
        anomaly.HalfSpaceTrees(
            window_size=150,
            n_trees=30,
            height=10,
            seed=40),
        q=0.98
    )
)

# Visualize data
import matplotlib.pyplot as plt

fig, axs = plt.subplots(2, 2, figsize=set_size(PLOT_WIDTH, subplots=(2,2)),
                        sharex=True)

# Subplot 1: train data
s = axs[0, 0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=ListedColormap(colors[0:4]))
axs[0, 0].set_title("a) Training Data")
axs[0, 0].set_ylabel('$y$')
legend1 = axs[0, 0].legend(
    s.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 40$",
     r"$y_{\mathrm{ref}} = 25$",
     r"$y_{\mathrm{ref}} = 0$",
     r"$y_{\mathrm{a}}$"])
axs[0, 0].add_artist(legend1)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#9467bd', '#d62728', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

xx, yy = np.meshgrid(np.linspace(0, 10, 200), np.linspace(0, 50, 200))
Z = unsupervised.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the data with cluster assignments
axs[1, 0].scatter(X[:, 0], X[:, 1], c=np.choose(np.where(
    unsupervised.predict(X) == -1, 0, unsupervised.predict(X)), ['#d62728', '#1f77b4']))
axs[1, 0].imshow(
    Z,
    interpolation="nearest",
    extent=(xx.min(), xx.max(), yy.min(), yy.max()),
    cmap=ListedColormap(['#d62728', '#1f77b4']),
    alpha=0.4,
    aspect="auto",
    origin="lower",
)
axs[1, 0].set_title('c) Batch Classifier')
axs[1, 0].set_xlabel('$u$')
axs[1, 0].set_ylabel('$y$')

# Subplot 2: test data
axs[0, 1].scatter(X_test[:, 0], X_test[:, 1], c=np.choose(y_test, colors))
s3 = axs[0, 1].scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=ListedColormap(colors[3:5]))
axs[0, 1].set_title("b) Testing Data")
legend1 = axs[0, 1].legend(
    s3.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 10$",
     r"$y_{\mathrm{a}}$"])
axs[0, 1].add_artist(legend1)

# Subplot 4: online model
y_online = []
for x_, y_ in zip(X, y):
  x_ = {i: v for i, v in enumerate(x_)}
  y_pred = online.predict_one(x_)
  y_online.append(y_pred) if y_pred is not None else y_online.append(0)
  online.learn_one(x_, y_)
axs[1, 1].scatter(X[:, 0], X[:, 1], c=np.choose(y_online, ['#1f77b4', '#d62728']))
axs[1, 1].set_title('d) Online Classifier')
axs[1, 1].set_xlabel('$u$')

xlim = (0, 10)
ylim = (0, 50)
for ax in axs.flatten():
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

# plt.subplots_adjust(hspace=0.3, wspace=0.2)

# Show the plot
plt.savefig("ml_gaps/online.pdf")
plt.show()

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
import random
random.seed(40)
from matplotlib.colors import ListedColormap
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

def get_cmap(Z):
  return ListedColormap(colors[0:len(np.unique(Z))])


# Add one sample from missing class to the test set
y = np.where(np.array(y) == -1, 5, y)
# X_test = X
# y_test = X

# Initialize DecisionTreeClassifier and fit the data
supervised = make_pipeline(
  StandardScaler(), 
  LogisticRegressionCV(max_iter=1000,
                       class_weight={4: 3}))
supervised.fit(X_train, y_train)

from river import tree
from river import linear_model
from river import preprocessing
from river import optim
from river import multiclass

online = tree.HoeffdingAdaptiveTreeClassifier(
    grace_period=10,
    delta=1e-4,
    seed=5
)
online = (
    preprocessing.StandardScaler() |
    multiclass.OutputCodeClassifier(linear_model.LogisticRegression(optimizer=optim.SGD(.1), l2=2), code_size=4)
)

# Visualize data
import matplotlib.pyplot as plt

fig, axs = plt.subplots(2, 2, figsize=set_size(PLOT_WIDTH, subplots=(2,2)),
                        sharex=True)

# Subplot 1: train data
s = axs[0, 0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=ListedColormap(colors[0:4]))
axs[0, 0].set_title("a) Train Data")
axs[0, 0].set_ylabel('$y$')
legend1 = axs[0, 0].legend(
    s.legend_elements()[0],
    [r"$y_{\mathrm{ref}} = 40$",
     r"$y_{\mathrm{ref}} = 25$",
     r"$y_{\mathrm{ref}} = 0$",
     r"$y_{\mathrm{a}}$"])
axs[0, 0].add_artist(legend1)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#9467bd', '#d62728', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

# Subplot 3: batch model
y_pred = supervised.predict(X)
axs[1, 0].scatter(X[:, 0], X[:, 1], c=np.choose(y_pred, colors))
axs[1, 0].set_title('c) Batch Classifier (Decision Tree)')
axs[1, 0].set_xlabel('$u$')
axs[1, 0].set_ylabel('$y$')

# Subplot 2: test data
axs[0, 1].scatter(X_test[:, 0], X_test[:, 1], c=np.choose(y_test, colors))
s3 = axs[0, 1].scatter(X_test[:, 0], X_test[:, 1], c=y_test,
                       cmap=ListedColormap(colors[3:5]))
axs[0, 1].set_title("b) All Data")

# Subplot 4: online model
y_online = []
for x_, y_ in zip(X, y):
  x_ = {i: v for i, v in enumerate(x_)}
  y_pred = online.predict_one(x_)
  y_online.append(y_pred) if y_pred is not None else y_online.append(0)
  online.learn_one(x_, y_)
axs[1, 1].scatter(X[:, 0], X[:, 1], c=np.choose(y_online, colors))
axs[1, 1].set_title('d) Online Classifier (Hoeffding Tree)')
axs[1, 1].set_xlabel('$u$')

xlim = (0, 10)
ylim = (0, 50)
for ax in axs.flatten():
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

# plt.subplots_adjust(hspace=0.3, wspace=0.2)

# Show the plot
plt.savefig("ml_gaps/online.pdf")
plt.show()

# Animations

In [None]:
data = X[:,1]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 0:
        ax.set(xlim=[0, frame])
    line_data.set_ydata(data[:frame])
    line_data.set_xdata(range(frame))
    
    return (line_data)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50)

# Show the plot
ani.save("sample_data.mp4", writer="ffmpeg")
fig.savefig("sampe_data.pdf")

In [None]:
ani.save(filename="tmp/test_sample_data.html", writer="html")

In [None]:
from river.stats import Mean

data = X[:,1]

model = Mean()

means = [model.update(data[0]).get()]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]
line_means = ax.plot(means[0], label=r'$\bar x_n$')[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 1:
        means.append(model.update(data[frame-1]).get())
        ax.set(xlim=[0, frame])
    line_data.set_ydata(data[:frame])
    line_means.set_ydata(means[:frame])
    line_data.set_xdata(range(frame))
    line_means.set_xdata(range(frame))
    
    return (line_data, line_means)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50)

# Show the plot
ani.save("welford.gif")
fig.savefig("welford.pdf")

In [None]:
from river.utils import Rolling
from river.stats import Mean

data = X[:,1]

rmodel = Rolling(Mean(), window_size=10)

means = [rmodel.update(data[0]).get()]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]
line_means = ax.plot(means[0], label=r'$\bar x_n$', color=colors[2])[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 1:
        means.append(rmodel.update(data[frame-1]).get())
        ax.set(xlim=[0, frame])
    line_data.set_ydata(data[:frame])
    line_means.set_ydata(means[:frame])
    line_data.set_xdata(range(frame))
    line_means.set_xdata(range(frame))
    
    return (line_data, line_means)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50)

# Show the plot
ani.save("inv_welford.gif")
fig.savefig("inv_welford.pdf")

In [None]:
from river.utils import Rolling
from river.proba import Gaussian

data = X[:,1]
data_anomalies = data.copy() * np.nan
gmodel = Rolling(Gaussian(), window_size=10)

means = [gmodel.update(data[0]).mu]
sigmas_p = [gmodel.mu + 3*gmodel.sigma]
sigmas_m = [gmodel.mu - 3*gmodel.sigma]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]
line_means = ax.plot(means[0], label=r'$\bar x_n$', color=colors[3])[0]
line_anomalies = ax.plot(means[0], label=r'$\bar x_n$', color=colors[4],
                         marker='o', markersize=2)[0]
line_sigma_p = ax.plot(sigmas_p[0], label=r'$\bar x_n$', 
                       color=line_means.get_color())[0]
line_sigma_m = ax.plot(sigmas_m[0], label=r'$\bar x_n$', 
                       color=line_means.get_color())[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 1:
        if ((data[frame-1] < sigmas_p[frame-2]) and (data[frame-1] > sigmas_m[frame-2])):
            pass
        else:
            data_anomalies[frame-1] = data[frame-1]
            
        means.append(gmodel.update(data[frame-1]).mu)
        sigmas_p.append(gmodel.mu + 3*gmodel.sigma)
        sigmas_m.append(gmodel.mu - 3*gmodel.sigma)
        ax.set(xlim=[0, frame])
    line_data.set_xdata(range(frame))
    line_data.set_ydata(data[:frame])
    line_means.set_xdata(range(frame))
    line_means.set_ydata(means[:frame])
    line_anomalies.set_xdata(range(frame))
    line_anomalies.set_ydata(data_anomalies[:frame])
    line_sigma_p.set_xdata(range(frame))
    line_sigma_p.set_ydata(sigmas_p[:frame])
    line_sigma_m.set_xdata(range(frame))
    line_sigma_m.set_ydata(sigmas_m[:frame])
    
    return (line_data, line_means, line_sigma_p, line_sigma_m)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50)

# Show the plot
ani.save("anomalies_inv_welford.gif")
fig.savefig("anomalies_inv_welford.pdf")

In [None]:
from river.utils import Rolling
from river.proba import Gaussian

data = X[:,1]
data_anomalies = data.copy() * np.nan
gmodel = Rolling(Gaussian(), window_size=10)

means = [gmodel.update(data[0]).mu]
sigmas_p = [gmodel.mu + 3*gmodel.sigma]
sigmas_m = [gmodel.mu - 3*gmodel.sigma]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]
line_means = ax.plot(means[0], label=r'$\bar x_n$', color=colors[3])[0]
line_anomalies = ax.plot(means[0], label=r'$\bar x_n$', color=colors[4])[0]
line_sigma_p = ax.plot(sigmas_p[0], label=r'$\bar x_n$', 
                       color=line_means.get_color())[0]
line_sigma_m = ax.plot(sigmas_m[0], label=r'$\bar x_n$', 
                       color=line_means.get_color())[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 1:
        if (frame < 20) or ((data[frame-1] < sigmas_p[frame-2]) and (data[frame-1] > sigmas_m[frame-2])):
            means.append(gmodel.update(data[frame-1]).mu)
        else:
            data_anomalies[frame-1] = data[frame-1]
            means.append(gmodel.mu)
        sigmas_p.append(gmodel.mu + 3*gmodel.sigma)
        sigmas_m.append(gmodel.mu - 3*gmodel.sigma)
        ax.set(xlim=[0, frame])
    line_data.set_xdata(range(frame))
    line_data.set_ydata(data[:frame])
    line_means.set_xdata(range(frame))
    line_means.set_ydata(means[:frame])
    line_anomalies.set_xdata(range(frame))
    line_anomalies.set_ydata(data_anomalies[:frame])
    line_sigma_p.set_xdata(range(frame))
    line_sigma_p.set_ydata(sigmas_p[:frame])
    line_sigma_m.set_xdata(range(frame))
    line_sigma_m.set_ydata(sigmas_m[:frame])
    
    return (line_data, line_means)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50)

# Show the plot
ani.save("anomalies_welford.gif")
fig.savefig("anomalies_welford.pdf")

In [None]:
from river.utils import Rolling
from river.proba import Gaussian

WINDOW_SIZE = 7
data = X[:,1]
data_anomalies = data.copy() * np.nan
gmodel = Rolling(Gaussian(), window_size=WINDOW_SIZE)

anomalies = []
means = [gmodel.update(data[0]).mu]
sigmas_p = [gmodel.mu + 3*gmodel.sigma]
sigmas_m = [gmodel.mu - 3*gmodel.sigma]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]
line_means = ax.plot(means[0], label=r'$\bar x_n$', color=colors[3])[0]
line_anomalies = ax.plot(means[0], label=r'$\bar x_n$', color=colors[4], marker='o', markersize=1)[0]
#line_sigma_p = ax.plot(sigmas_p[0], label=r'$\bar x_n$', 
#                       color=line_means.get_color())[0]
#line_sigma_m = ax.plot(sigmas_m[0], label=r'$\bar x_n$', 
#                       color=line_means.get_color())[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 1:
        if (
            (frame < 20) or 
            (
                (data[frame-1] < sigmas_p[frame-2]) and 
                (data[frame-1] > sigmas_m[frame-2])
                ) or 
            (sum(anomalies[-WINDOW_SIZE::])/
             len(anomalies[-WINDOW_SIZE::]) > 0.9973)
            ):
            means.append(gmodel.update(data[frame-1]).mu)
            anomalies.append(0)
        else:
            
            data_anomalies[frame-1] = data[frame-1]
            means.append(gmodel.mu)
            anomalies.append(1)
        sigmas_p.append(gmodel.mu + 3*gmodel.sigma)
        sigmas_m.append(gmodel.mu - 3*gmodel.sigma)
        ax.set(xlim=[0, frame])
    line_data.set_xdata(range(frame))
    line_data.set_ydata(data[:frame])
    line_means.set_xdata(range(frame))
    line_means.set_ydata(means[:frame])
    line_anomalies.set_xdata(range(frame))
    line_anomalies.set_ydata(data_anomalies[:frame])
    #line_sigma_p.set_xdata(range(frame))
    #line_sigma_p.set_ydata(sigmas_p[:frame])
    #line_sigma_m.set_xdata(range(frame))
    #line_sigma_m.set_ydata(sigmas_m[:frame])
    
    return (line_data, line_means)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50, repeat=False)

# Show the plot
ani.save("self_welford.gif")
fig.savefig("self_welford.pdf")

In [None]:
from river.utils import Rolling
from river.proba import Gaussian

WINDOW_SIZE = 7
data = X[:,1]
data_anomalies = data.copy() * np.nan
gmodel = Rolling(Gaussian(), window_size=WINDOW_SIZE)

anomalies = []
means = [gmodel.update(data[0]).mu]
sigmas_p = [gmodel.mu + 3*gmodel.sigma]
sigmas_m = [gmodel.mu - 3*gmodel.sigma]

fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
ax.set(ylim=[-2, 50])

line_data = ax.plot(data[0], label=r'$y_{\mathrm{r}}$')[0]
#line_means = ax.plot(means[0], label=r'$\bar x_n$', color=colors[3])[0]
line_anomalies = ax.plot(means[0], label=r'$\bar x_n$', color=colors[4], marker='o', markersize=1)[0]
line_sigma_p = ax.plot(sigmas_p[0], label=r'$\bar x_n$', 
                       color=line_means.get_color())[0]
line_sigma_m = ax.plot(sigmas_m[0], label=r'$\bar x_n$', 
                       color=line_means.get_color())[0]

ax.legend(loc="upper right")

import matplotlib.animation as animation
def update(frame):
    if frame > 1:
        if (
            (frame < 20) or 
            (
                (data[frame-1] < sigmas_p[frame-2]) and 
                (data[frame-1] > sigmas_m[frame-2])
                ) or 
            (sum(anomalies[-WINDOW_SIZE::])/
             len(anomalies[-WINDOW_SIZE::]) > 0.9973)
            ):
            means.append(gmodel.update(data[frame-1]).mu)
            anomalies.append(0)
        else:
            
            data_anomalies[frame-1] = data[frame-1]
            means.append(gmodel.mu)
            anomalies.append(1)
        sigmas_p.append(gmodel.mu + 3*gmodel.sigma)
        sigmas_m.append(gmodel.mu - 3*gmodel.sigma)
        ax.set(xlim=[0, frame])
    line_data.set_xdata(range(frame))
    line_data.set_ydata(data[:frame])
    #line_means.set_xdata(range(frame))
    #line_means.set_ydata(means[:frame])
    line_anomalies.set_xdata(range(frame))
    line_anomalies.set_ydata(data_anomalies[:frame])
    line_sigma_p.set_xdata(range(frame))
    line_sigma_p.set_ydata(sigmas_p[:frame])
    line_sigma_m.set_xdata(range(frame))
    line_sigma_m.set_ydata(sigmas_m[:frame])
    
    return (line_data, line_means)

ani = animation.FuncAnimation(fig=fig, func=update, 
                              frames=len(X), interval=50, repeat=False)

# Show the plot
ani.save("thresh_welford.gif")
fig.savefig("thresh_welford.pdf")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def welford(data):
    n = 0
    mean = 0
    M2 = 0
    means = []
    stds = []
    for x in data:
        n += 1
        delta = x - mean
        mean += delta/n
        delta2 = x - mean
        M2 += delta * delta2
        var = M2/(n-1) if n > 1 else 0
        std = np.sqrt(var)
        means.append(mean)
        stds.append(std)
    return means, stds

def inverse_welford(data, window_size):
    window = data[:window_size]
    window_mean = np.mean(window)
    window_std = np.std(window)
    means = [window_mean] * (window_size - 1)
    stds = [window_std] * (window_size - 1)
    for i, x in enumerate(data[window_size:], start=window_size):
        prev = data[i - window_size]
        next_ = x
        window_mean = window_mean + (next_ - prev) / window_size
        window_std = np.sqrt(((window_size - 2) * window_std ** 2 + (next_ - prev) ** 2 + (next_ - window_mean) * (prev - window_mean)) / (window_size - 1))
        means.append(window_mean)
        stds.append(window_std)
    return means, stds

# Generate example data
data = X[:,1]
window_size = 10
# Calculate running means using Welford algorithm and inverse Welford algorithm
welford_means, w_std = welford(data)
inverse_welford_means, iw_std = inverse_welford(data, window_size=window_size)

# Plot the data and running means
fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.plot(data, label=r'$y_{\mathrm{r}}$',
        alpha=0.25)
ax.plot(welford_means, label=r'$\bar x_n$ ($t_\mathrm{e} = \infty$)',
        alpha=0.25)
ax.plot(inverse_welford_means, 
        label=fr'$\bar x_n$ ($t_\mathrm e = {window_size}$)')
ax.legend()
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')

# ax.set_title('Running Means using Welford Algorithm with and without expiration')
# Show the plot
plt.savefig("sample/welford_unstable.pdf")
plt.show()

In [None]:
import pandas as pd
inverse_welford_means_prot = pd.read_csv(
    "inv_wel_prot_mean.csv", index_col=0).iloc[:,0].values
# Generate example data
data = X[:,1]
window_size = 10
# Calculate running means using Welford algorithm and inverse Welford algorithm
welford_means, w_std = welford(data)
inverse_welford_means, iw_std = inverse_welford(data, window_size=window_size)

# Plot the data and running means
fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
ax.plot(data, label=r'$y_{\mathrm{r}}$',
        alpha=0.25)
ax.plot(welford_means, label=r'$\bar x_n$ ($t_\mathrm{e} = \infty$)',
        alpha=0.25)
ax.plot(inverse_welford_means, 
        label=fr'$\bar x_n$ ($t_\mathrm e = {window_size}$)')
ax.plot(inverse_welford_means_prot, 
        label=fr'$\bar x_n$ ($t_\mathrm e = {window_size}$)*')
ax.legend()
ax.set_xlabel('$t$')
ax.set_ylabel('$y$')
# ax.set_title('Running Means using Welford Algorithm with and without expiration')
# Show the plot
plt.savefig("sample/welford_compare_prot.pdf")
plt.show()

In [None]:
residuals = data[-len(inverse_welford_means)::] - inverse_welford_means

In [None]:
inverse_welford_stds_prot = pd.read_csv(
    "inv_wel_prot_std.csv", index_col=0).iloc[:,0].values

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
random.seed(3)
# Define the mean and standard deviation lists
mean_list = [0]*3  # Example mean values
std_list = random.sample(list(inverse_welford_stds_prot),3)   # Example standard deviation values
std_list = random.sample(iw_std,4)   # Example standard deviation values

# Generate a range of x-values
x = np.linspace(-10, 10, 1000)

# Create a figure and axes
fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))

# Iterate over mean and std lists
for mean, std in zip(mean_list, std_list):
    # Calculate the PDF values for the current mean and std
    y = norm.pdf(x, mean, std)

    # Plot the PDF curve
    line, = ax.plot(x, y, label=fr"$\bar x_n={mean}, s_n={std:04.2f}$")
    # Get the color of the current plot
    line_color = line.get_color()
    # Calculate 3 sigma bounds
    sigma = 3 * std
    ax.axvline(mean + sigma, linestyle='--', color=line_color, alpha=0.5)
    ax.axvline(mean - sigma, linestyle='--', color=line_color, alpha=0.5)

# Set plot title and labels
ax.set_xlabel("$X$")
ax.set_ylabel("PDF")

# Add a legend
ax.legend()

# Display the plot
plt.savefig("sample/sigmas.pdf")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
random.seed(3)
# Define the mean and standard deviation lists
mean_list = random.sample(inverse_welford_means,3)  # Example mean values
std_list = random.sample(iw_std,3)   # Example standard deviation values

# Generate a range of x-values
x = np.linspace(-20, 45, 1000)

# Create a figure and axes
fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))

# Iterate over mean and std lists
for mean, std in zip(mean_list, std_list):
    # Calculate the PDF values for the current mean and std
    y_pdf = norm.pdf(x, mean, std)
    y_cdf = norm.cdf(x, mean, std)

    # Plot the PDF curve
    line, = ax.plot(x, y_cdf, label=fr"$\bar x_n={mean:05.2f}, s_n={std:04.2f}$")

    # Calculate 3 sigma bounds
    sigma = 3 * std

    # Get the color of the current plot
    line_color = line.get_color()

    # Mark the probabilities with dashed horizontal lines
    ax.axhline(0.9973, linestyle='--', color='k', alpha=0.5)
    ax.axhline(1-0.9973, linestyle='--', color='k', alpha=0.5)
    ax.axvline(norm.ppf(0.9973, mean, sigma), linestyle='--',
               color=line_color, alpha=0.5)
    ax.axvline(norm.ppf(1-0.9973, mean, sigma), linestyle='--',
               color=line_color, alpha=0.5)

# Set plot title and labels
ax.set_xlabel("$X$")
ax.set_ylabel("CDF")

# Add a legend
ax.legend()

# Display the plot
plt.savefig("sample/cdf_ppf.pdf")
plt.show()

In [None]:
from scipy.stats import norm
# Generate data for the Gaussian distribution
mu = 0
sigma = 1
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 100)
pdf = norm.pdf(x, mu, sigma)

# Define the sigma levels for the vertical lines
sigma_levels = [-3, -2, -1, 1, 2, 3]

# Define the x-axis tick positions and labels for the sigma regions
xtickvals = [mu + level*sigma for level in sigma_levels]
xticklabels = [f'{level}$\sigma$' for level in sigma_levels]

# Create the plot
fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))

# Add the Gaussian probability density function trace
ax.plot(x, pdf, label='PDF')

# Add the vertical lines for the sigma levels
for i, level in enumerate(sigma_levels):
    color = next(ax._get_lines.prop_cycler) if i == 0 else color
    ax.axvline(x=mu + level*sigma, **color, 
               linestyle='--', label='sigma' if i == 0 else None)
    
# Update the legend
handles, labels = ax.get_legend_handles_labels()
new_handles, new_labels = [], []
for handle, label in zip(handles, labels):
    if label not in new_labels:
        new_handles.append(handle)
        new_labels.append(label)
ax.legend(new_handles, new_labels)

# Update the axis labels and tick positions
ax.set_xlabel('x')
ax.set_ylabel('PDF')
ax.set_xticks(xtickvals)
ax.set_xticklabels(xticklabels)

# Show the plot
plt.savefig("sample/sigmas.pdf")
plt.show()


# BESS

In [None]:
plt.rcParams["figure.subplot.left"] = 0.10
plt.rcParams["figure.subplot.bottom"] = 0.2
plt.rcParams["figure.subplot.right"] = 0.95
plt.rcParams["figure.subplot.top"] = 0.85

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

In [None]:
import pandas as pd
df = pd.read_csv('bess/average_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
col = 'Average Cell Temperature'

In [None]:
def plot_fake_limits_(
    ser: pd.Series,
    window: Union[timedelta, None] = None,
    file_name: Union[str, None] = None,
    save: bool = True,
    **kwargs
):
    file_name = make_name(ser.name, window, file_name)
    ser_high = ser.copy()
    ser_low = ser.copy()
    ser_high.iloc[:] = 0.7
    ser_low.iloc[:] = 0.58
    anomalies = (ser_low > ser) | (ser > ser_high)

    a = anomalies.astype(int).diff()

    fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
    
    set_axis_style(ax, ser, "Date", f"{ser.name} [-]")
    ax.set_ylim(0.5, 0.75)

    if "part" in kwargs:
        ser = ser.loc[:'2022-03-06']

    ax.plot(ser.resample('1t').fillna(None), linewidth=0.7)
    ax.legend(
        ['Signal'], bbox_to_anchor=(0., 1.05, 1., .102),
        loc='lower left', ncols=2, mode="expand", borderaxespad=0.)
    if save:
        fig.savefig(f"bess/{file_name}_signal.pdf", backend='pdf')
    
    an_ser = ser.copy()
    an_ser[anomalies == 0] = None
    ax.plot(an_ser, linewidth=1.2, color="r")
    ax.legend(
        ['Signal', "Anomalies"], bbox_to_anchor=(0., 1.05, 1., .102),
        loc='lower left', ncols=2, mode="expand", borderaxespad=0.)
    
    if save:
        fig.savefig(f"bess/{file_name}_anomalies.pdf", backend='pdf')

    if (ser_high is not None) and (ser_low is not None):
        ax.plot(ser_high.index, ser_high, color=(
            1, 0, 0, 0.25), linewidth=0.7, label=r'Threshold')
        ax.plot(ser_low.index, ser_low, color=(1, 0, 0, 0.25),
                linewidth=0.7, label=r'Threshold')

        ax.fill_between(ser_high.index, ser_high, 0.75,
                        color=(1, 0, 0, 0.1), alpha=0.1)
        ax.fill_between(ser_low.index, ser_low, 0.5,
                        color=(1, 0, 0, 0.1), alpha=0.1)
    
    if save:
        fig.savefig(f"bess/{file_name}_thresh.pdf", backend='pdf')

plot_fake_limits_(df[col].loc[:'2022-03-06'], save=True, file_name="naive/min")
plot_fake_limits_(df[col], save=True, file_name="naive/pred", **{"part":True})
plot_fake_limits_(df[col], save=True, file_name="naive/full")

In [None]:
def plot_signal_(
        ser: pd.Series,
        window: Union[timedelta, None] = None,
        file_name: Union[str, None] = None,
        save: bool = True,
        ax_: Union[plt.Axes, None] = None,
        **kwargs
):
    file_name = make_name(ser.name, window, file_name)

    fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
    set_axis_style(ax, ser, "Date", f"{ser.name} [-]")
    ax.set_ylim(0.5, 0.75)

    if ax_ is not None:
        lines = ax_.get_lines()
        print(lines, 1)
        for line in lines:
            ax.plot(*line.get_data(), label=line.get_label(), 
                    alpha=0.5, linewidth=line.get_linewidth())
    
    ax.plot(ser.resample('1t').fillna(None), linewidth=0.7, label="Signal")
    ax.legend(bbox_to_anchor=(0., 1.05, 1., .102),
        loc='lower left', ncols=2, mode="expand", borderaxespad=0.)
    
    if save:
        fig.savefig(f"{file_name}_signal.pdf", backend='pdf')
    
    return ax

ax_signal = plot_signal_(df[col], save=True, file_name="bess/all")

In [None]:
from river.stats import Mean

model = Mean()

means = []

for i, (t, x) in enumerate(df.iterrows()):
    t = t.tz_localize(None)
    x = x[col]
    means.append(model.update(x).get())
    
ser_means = pd.Series(means, index=df.index, name=f"{df[col].name}")

In [None]:
def plot_mean_(
        ser: pd.Series,
        window: Union[timedelta, None] = None,
        file_name: Union[str, None] = None,
        save: bool = True,
        ax_: Union[plt.Axes, None] = None,
        **kwargs
):
    file_name = make_name(ser.name, window, file_name)

    fig, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ))
    set_axis_style(ax, ser, "Date", f"{ser.name} [-]")
    ax.set_ylim(0.5, 0.75)

    if ax_ is not None:
        lines = ax_.get_lines()
        for line in lines:
            ax.plot(*line.get_data(), label=line.get_label(), 
                    linewidth=line.get_linewidth())

    ax.plot(ser, linewidth=2, label="Mean", **kwargs)
    ax.legend(bbox_to_anchor=(0., 1.05, 1., .102),
        loc='lower left', ncols=2, mode="expand", borderaxespad=0.)
    
    if save:
        fig.savefig(f"{file_name}_signal.pdf", backend='pdf')
    
    return ax

ax_m = plot_mean_(ser_means, save=True, file_name="bess/mean", ax_=ax_signal)

In [None]:
import datetime as dt
from river.utils import TimeRolling
from river.stats import Mean

WINDOW = dt.timedelta(hours=24*7)

rmodel = TimeRolling(Mean(), period=WINDOW)

rmeans = []

for i, (t, x) in enumerate(df.iterrows()):
    t = t.tz_localize(None)
    x = x[col]
    rmeans.append(rmodel.update(x, t=t).get())
    
ser_rmeans = pd.Series(rmeans, index=df.index, name=f"{df[col].name}")

In [None]:
ax_rm = plot_mean_(ser_rmeans, save=True, file_name="bess/rmean",
                   ax_=ax_signal,
                   color=colors[2])

In [None]:
import datetime as dt
from river.utils import TimeRolling
from river.proba import Gaussian

WINDOW = dt.timedelta(hours=24*7)

gmodel = TimeRolling(Gaussian(), period=WINDOW)

gmeans = []; gstd_p = []; gstd_m = []; anomalies = pd.Series()

for i, (t, x) in enumerate(df.iterrows()):
    t = t
    x = x[col]
    if (
            (i < 60*24) or 
            (
                (x < gstd_p[-1]) and 
                (x > gstd_m[-1])
                )
            ):
        anomalies[t] = 0
    else:
        anomalies[t] = 1
    gmeans.append(gmodel.update(x, t=t.tz_localize(None)).mu)
    gstd_p.append(gmodel.mu + 3*gmodel.sigma)
    gstd_m.append(gmodel.mu - 3*gmodel.sigma)

df_g = pd.DataFrame({"Mean": gmeans, "Std_P": gstd_p, "Std_M": gstd_m},
                    index=df.index)

In [None]:
plot_limits_(df[col], None, df_g.Std_P, df_g.Std_M, save=True, 
             file_name="bess/thresh_unsupervised")

In [None]:
import datetime as dt
from river.utils import TimeRolling
from river.proba import Gaussian

WINDOW_SIZE = dt.timedelta(hours=24*7)

gmodel = TimeRolling(Gaussian(), period=WINDOW_SIZE)

gmeans = []; gstd_p = []; gstd_m = []; anomalies = pd.Series()

for i, (t, x) in enumerate(df.iterrows()):
    x = x[col]
    if i > 0 and ((x < gstd_p[-1]) and (x > gstd_m[-1])):
        anomalies[t] = 0
    else:
        anomalies[t] = 1
    gmeans.append(gmodel.update(x, t=t.tz_localize(None)).mu)
    gstd_p.append(gmodel.mu + 3*gmodel.sigma)
    gstd_m.append(gmodel.mu - 3*gmodel.sigma)

df_g = pd.DataFrame({"Mean": gmeans, "Std_P": gstd_p, "Std_M": gstd_m},
                    index=df.index)

In [None]:
plot_limits_(df[col], anomalies, df_g.Std_P, df_g.Std_M, save=True, 
             file_name="bess/thresh_anomaly_unsupervised")

In [None]:
import datetime as dt
from river.utils import TimeRolling
from river.proba import Gaussian

WINDOW_SIZE = dt.timedelta(hours=24*7)

gmodel = TimeRolling(Gaussian(), period=WINDOW_SIZE)

gmeans = []; gstd_p = []; gstd_m = []; anomalies = pd.Series()

for i, (t, x) in enumerate(df.iterrows()):
    t = t
    x = x[col]
    if (
            (i < 60*24) or 
            (
                (x < gstd_p[-1]) and 
                (x > gstd_m[-1])
                )
            ):
        gmeans.append(gmodel.update(x, t=t.tz_localize(None)).mu)
        anomalies[t] = 0
    else:
        gmeans.append(gmodel.mu)
        anomalies[t] = 1
    gstd_p.append(gmodel.mu + 3*gmodel.sigma)
    gstd_m.append(gmodel.mu - 3*gmodel.sigma)

df_g = pd.DataFrame({"Mean": gmeans, "Std_P": gstd_p, "Std_M": gstd_m},
                    index=df.index)

In [None]:
from datetime import timedelta

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# import matplotlib as mpl
# mpl.use('macOsX')

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "Times New Roman",
    "axes.labelsize": 10,
    "axes.grid": True,
    "font.size": 10,
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "figure.figsize": plt.rcParamsDefault["figure.figsize"],
    "figure.subplot.left": 0.1,
    "figure.subplot.bottom": 0.2,
    "figure.subplot.right": 0.95,
    "figure.subplot.top": 0.85,
    # "backend": "macOsX"
})

PLOT_WIDTH = 0.75*398.3386

locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(
    locator,
    formats=['%Y', '%d %b', '%d %b', '%H:%M', '%H:%M', '%S.%f'],
    offset_formats=['', '%Y', '', '%Y-%b-%d', '%Y-%b-%d', '%Y-%b-%d %H:%M'])


def set_size(width=307.28987, fraction=1.4, subplots=(1, 1)):
    """Set figure dimensions to avoid scaling in LaTeX.

    Parameters
    ----------
    width: float or string
            Document width in points, or string of predined document type
    fraction: float, optional
            Fraction of the width which you wish the figure to occupy
    subplots: array-like, optional
            The number of rows and columns of subplots.
    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
    """
    if width == 'thesis':
        width_pt = 426.79135
    elif width == 'beamer':
        width_pt = 307.28987
    else:
        width_pt = width

    # Width of figure (in pts)
    fig_width_pt = width_pt * fraction
    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    # https://disq.us/p/2940ij3
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])

    return (fig_width_in, fig_height_in)


def set_axis_style(ax, ser, xlabel='', ylabel=''):
    ax.set_xlabel(xlabel)
    ax.set_ylabel(f"{ylabel}")
    ax.set_xlim([ser.index.min(), ser.index.max()])
    ax.set_ylim(ser.min(), ser.max())
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(formatter)
    ax.tick_params(axis='x', labelrotation=50, labelsize=8)


def plot_anomalies(ax, a):
    for x0, x1 in zip(a[a == 1].index, a[a == -1].index):
        ax.axvspan(x0, x1, facecolor='red', alpha=0.5,
                   linewidth=1.1, edgecolor='red')


def make_name(name, window, file_name):
    if file_name is None:
        if window:
            file_name = (
                f"{name.replace(' ', '_')}_"
                f"{int(window.total_seconds()/60/60)}_hours_sliding"
            )
        else:
            file_name = f"{name.replace(' ', '_')}_sliding"
    return file_name


def plot_gaussian_pdf(
        sample_high: Union[pd.Series, None] = None,
        sample_low: Union[pd.Series, None] = None,
        sample_mean: Union[pd.Series, None] = None,
        ax = None,
        **kwargs):
    if ax is None:
        _, ax = plt.subplots(figsize=set_size())

    std = (sample_high - sample_mean)/3

    x = np.linspace(kwargs["ylim"][0], kwargs["ylim"][1], 55)
    y = (1/(std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - sample_mean)/std)**2)

    ax.fill_between([0, max(y)], sample_high, kwargs["ylim"][1], 
                    label=r'Limits',
                    color=(1, 0, 0, 0.1), edgecolor=(1, 0, 0, 0.5),
                    linestyle="-", linewidth=0.7,)
    ax.fill_between([0, max(y)], sample_low, kwargs["ylim"][0],
                    color=(1, 0, 0, 0.1), edgecolor=(1, 0, 0, 0.5),
                    linestyle="-", linewidth=0.7,)

    ax.plot(y, x)  # Plot the curve
    ax.set_xlabel('PDF')
    ax.set_xlim((min(y), max(y)))
    ax.set_ylim(kwargs["ylim"])
    ax.grid(True)
    

def plot_limits__(
    ser: pd.Series,
    anomalies: Union[pd.Series, None] = None,
    ser_high: Union[pd.Series, None] = None,
    ser_low: Union[pd.Series, None] = None,
    ser_mean: Union[pd.Series, None] = None,
    window: Union[timedelta, None] = None,
    file_name: Union[str, None] = None,
    save: bool = True,
    **kwargs
):
    file_name = make_name(ser.name, window, file_name)
    # _, ax = plt.subplots(figsize=set_size(PLOT_WIDTH, ), sharey=True)
    #width_orig = ax.get_position().width

    fig, ax = plt.subplots(1, 2, figsize=set_size(PLOT_WIDTH, ),
                           sharey=True, 
                           gridspec_kw={'width_ratios': [0.8, 0.2]})
    fig.subplots_adjust(wspace=1/20)
    # ax[0].set_adjustable('box')
    
    ser = ser.loc[:"2022-03-05"
        #0:round(len(ser)*((ax[0].get_position()).width/width_orig))
        ]
    set_axis_style(ax[0], ser, "Date", f"{ser.name} [-]")
    if "ylim" not in kwargs:
        kwargs["ylim"] = (ser.min(), ser.max())
    ax[0].set_ylim(kwargs["ylim"])
    

    if kwargs.get("xticks_on") == "anomalies":
        a = anomalies.astype(int).diff()
        b = a[a == 1].resample('1d').sum()
        ax[0].set_xticks(b[b > 0].index.map(str))
    elif kwargs.get("xticks_on"):
        ax[0].set_xticks(kwargs["xticks_on"].index.map(str))

    
    ax[0].plot(ser.resample('1t').fillna(None), linewidth=0.7, label="Signal")

    if anomalies is not None:
        an_ser = ser.copy()
        an_ser[anomalies == 0] = None
        ax[0].plot(an_ser, linewidth=1.2, color="r", label="Anomalies",
                marker='.', markersize=0.8)

    if (ser_high is not None) and (ser_low is not None):
        ax[0].fill_between(ser_high.index, ser_high, kwargs["ylim"][1], 
                        label=r'Limits',
                        color=(1, 0, 0, 0.1), edgecolor=(1, 0, 0, 0.5),
                        linestyle="-", linewidth=0.7,)
        ax[0].fill_between(ser_low.index, ser_low, kwargs["ylim"][0],
                        color=(1, 0, 0, 0.1), edgecolor=(1, 0, 0, 0.5),
                        linestyle="-", linewidth=0.7,)

    ax[0].legend(bbox_to_anchor=(0., 1.05, 1., .102),
              loc='lower left', ncols=3, mode="expand", borderaxespad=0.)

    plot_gaussian_pdf(
        ser_high.loc[ser.index[-1]],
        ser_low.loc[ser.index[-1]],
        ser_mean.loc[ser.index[-1]],
        ax[1], **kwargs)
    
    if save:
        fig.savefig(f"{file_name}_thresh.pdf", backend='pdf')
    
    
plot_limits__(df[col], anomalies, df_g.Std_P, df_g.Std_M, df_g.Mean,
              save=True, file_name="test", **{"ylim": (0.5, 0.75)})

In [None]:
plot_limits_(df[col], anomalies, df_g.Std_P, df_g.Std_M, save=True, 
             file_name="bess/thresh_anomaly_selfsupervised")

In [None]:
def rolling_mean(x, window):
    return (sum(x[-window::])/len(x[-window::]))

In [None]:
import datetime as dt
from river.utils import TimeRolling
from river.proba import Gaussian

WINDOW_SIZE = dt.timedelta(hours=24*7)

gmodel = TimeRolling(Gaussian(), period=WINDOW_SIZE)

gmeans = []; gstd_p = []; gstd_m = []; anomalies = pd.Series()

for i, (t, x) in enumerate(df.iterrows()):
    t = t
    x = x[col]
    if (
            (i < 60*24) or 
            (
                (x < gstd_p[-1]) and 
                (x > gstd_m[-1])
                ) or
            (rolling_mean(anomalies, 300) > 0.9973)
            ):
        gmeans.append(gmodel.update(x, t=t.tz_localize(None)).mu)
        anomalies[t] = 0
    else:
        gmeans.append(gmodel.mu)
        anomalies[t] = 1
    gstd_p.append(gmodel.mu + 3*gmodel.sigma)
    gstd_m.append(gmodel.mu - 3*gmodel.sigma)

df_g = pd.DataFrame({"Mean": gmeans, "Std_P": gstd_p, "Std_M": gstd_m},
                    index=df.index)

In [None]:
plot_limits_(df[col], anomalies, df_g.Std_P, df_g.Std_M, save=False)

In [None]:
def time_rolling_mean(x, t, dt):
    len_ = len(x[t-dt::])
    return (sum(x[t-dt::])/len_) if len_ > 0 else 1

In [None]:
import datetime as dt
from river.utils import TimeRolling
from river.proba import Gaussian

WINDOW_SIZE = dt.timedelta(hours=24*7)

gmodel = TimeRolling(Gaussian(), period=WINDOW_SIZE)

gmeans = []; gstd_p = []; gstd_m = []; anomalies = pd.Series()

for i, (t, x) in enumerate(df.iterrows()):
    t = t
    x = x[col]
    if (
            (i < 60*24) or 
            (
                (x < gstd_p[-1]) and 
                (x > gstd_m[-1])
                ) or 
            (time_rolling_mean(anomalies, t, dt.timedelta(hours=5)) > 0.9973)
            ):
        gmeans.append(gmodel.update(x, t=t.tz_localize(None)).mu)
        anomalies[t] = 0
    else:
        gmeans.append(gmodel.mu)
        anomalies[t] = 1
    gstd_p.append(gmodel.mu + 3*gmodel.sigma)
    gstd_m.append(gmodel.mu - 3*gmodel.sigma)

df_g = pd.DataFrame({"Mean": gmeans, "Std_P": gstd_p, "Std_M": gstd_m},
                    index=df.index)

In [None]:
plot_limits_(df[col], None, df_g.Std_P, df_g.Std_M, save=False)

In [None]:
df_out = pd.read_csv("bess/bess_output.csv", index_col=0)
df_out.index = pd.to_datetime(df_out.index, utc=True)

In [None]:
plot_limits_(df[col], df_out.anomaly, df_out.level_high, df_out.level_low, save=True, file_name="bess/final_uniform_xticks", **{"ylim": (0.5, 0.75)})

In [None]:
plot_limits_(df[col], df_out.anomaly, df_out.level_high, df_out.level_low, save=True, **{"xticks_on": "anomalies", "ylim": (0.5, 0.75)})

In [None]:
def plot_limits_halfwidth(
    ser: pd.Series,
    anomalies: Union[pd.Series, None] = None,
    ser_high: Union[pd.Series, None] = None,
    ser_low: Union[pd.Series, None] = None,
    window: Union[timedelta, None] = None,
    file_name: Union[str, None] = None,
    save: bool = True,
    **kwargs
):
    file_name = make_name(ser.name, window, file_name)
    w, h = set_size(PLOT_WIDTH, )
    w *= 0.5
    fig, ax = plt.subplots(figsize=(w, h))

    set_axis_style(ax, ser, "Date", f"{ser.name} [-]")
    if "ylim" not in kwargs:
        kwargs["ylim"] = (ser.min(), ser.max())
    ax.set_ylim(kwargs["ylim"])

    if kwargs.get("xticks_on") == "anomalies":
        a = anomalies.astype(int).diff()
        b = a[a == 1].resample('1d').sum()
        ax.set_xticks(b[b > 0].index.map(str))
    elif kwargs.get("xticks_on"):
        ax.set_xticks(kwargs["xticks_on"].index.map(str))

    ax.plot(ser.resample('1t').fillna(None), linewidth=0.7, label="Signal")

    if anomalies is not None:
        an_ser = ser.copy()
        an_ser[anomalies == 0] = None
        ax.plot(an_ser, linewidth=1.2, color="r", label="Anomalies",
                marker='.', markersize=0.8)

    if (ser_high is not None) and (ser_low is not None):
        ax.fill_between(ser_high.index, ser_high, kwargs["ylim"][1],
                        label=r'Limits',
                        color=(1, 0, 0, 0.1), edgecolor=(1, 0, 0, 0.5),
                        linestyle="-", linewidth=0.7,)
        ax.fill_between(ser_low.index, ser_low, kwargs["ylim"][0],
                        color=(1, 0, 0, 0.1), edgecolor=(1, 0, 0, 0.5),
                        linestyle="-", linewidth=0.7,)

    ax.legend(bbox_to_anchor=(0., 1.05, 1., .102),
              loc='lower left', ncols=3, mode="expand", borderaxespad=0.)

    if save:
        fig.savefig(f"bess/half_{file_name}_thresh.pdf", backend='pdf')

In [None]:
plot_limits_halfwidth(df[col]['2022-03-06':'2022-03-15'], df_out.anomaly['2022-03-06':'2022-03-15'], df_out.level_high['2022-03-06':'2022-03-15'], df_out.level_low['2022-03-06':'2022-03-15'], save=True, **{"xticks_on": "anomalies", "ylim": (0.5, 0.75)})

In [None]:
def plot_compare_anomalies_(
        ser: pd.Series,
        anomalies: pd.DataFrame,
        window: Union[timedelta, None] = None,
        file_name: Union[str, None] = None,
        save: bool = True,
        **kwargs):

    file_name = make_name(ser.name, window, file_name)

    n_rows = len(anomalies.columns)
    _, axs = plt.subplots(nrows=n_rows, ncols=1,
                          figsize=set_size(subplots=(1, 1)),
                          sharex=True)

    if "ylim" not in kwargs:
        kwargs["ylim"] = (ser.min(), ser.max())

    axs[0].plot(ser.resample('1t').fillna(None), linewidth=0.7,
                 label="Signal")
    
    for ax in axs:
        set_axis_style(ax, ser, "",
                       r"\begin{center}~\\~\\T~[-]\end{center}")
        ax.set_ylim(kwargs["ylim"])

    if kwargs.get("xticks_on") == "anomalies":
        a = anomalies.iloc[:,-1].astype(int).diff()
        b = a[a == 1].resample('1d').sum()
        set_axis_style(axs[-1], ser, "Data",
                       r"\begin{center}~\\~\\T~[-]\end{center}")
        axs[-1].set_ylim(kwargs["ylim"])
        axs[-1].set_xticks(b[b > 0].index.map(str))
    elif kwargs.get("xticks_on"):
        axs[-1].set_xticks(kwargs["xticks_on"].index.map(str))

    if save:
            plt.savefig(f"{file_name}_compare_anomalies_0.pdf")

    for row, anomaly in enumerate(anomalies, start=0):
        if row != 0:
            axs[row].plot(ser.resample('1t').fillna(None), linewidth=0.7,
                 label="Signal")
        axs[row].set_ylabel(anomaly)
        #axs[row].set_ylim(kwargs["ylim"])

        a = anomalies[anomaly].astype(int).diff()
        plot_anomalies(axs[row], a)

        axs[0].legend(
            ['Signal', "Anomalies"], bbox_to_anchor=(0., 1.05, 1., .102),
            loc='lower left', ncols=2, mode="expand", borderaxespad=0.)
        
        if save:
            plt.savefig(f"{file_name}_compare_anomalies_{chr(97+row)}.pdf")

    plt.show()

In [None]:
df_compare = pd.read_csv("bess/comparison_output.csv", index_col=0)
df_compare.index = pd.to_datetime(df_compare.index, utc=True)
df_compare = df_compare.rename(columns={
    'HST': r"\begin{center}Half-Space\\Trees\\T~[-]\end{center}",
    'OSVM': r"\begin{center}One-Class\\SVM\\T~[-]\end{center}",
    "ICDF": r"\begin{center}\textbf{Real-Time\\OD}\\T~[-]\end{center}"})
plot_compare_anomalies_(df[col], df_compare, save=True, file_name="bess/",
                        **{"xticks_on": "anomalies", "ylim": (0.5, 0.75)})

# Inverter

In [None]:
import pandas as pd
df = pd.read_csv('inverter/inverter_temperature.csv', index_col=0)
df.index = pd.to_datetime(df.index, utc=True)
col = 'Inverter Temperature'

In [None]:
df_out = pd.read_csv("inverter/inverter_output.csv", index_col=0)
df_out.index = pd.to_datetime(df_out.index, utc=True)

In [None]:
plot_limits_(df[col], df_out.anomaly, df_out.level_high, df_out.level_low,
             save=True, file_name="inverter/inverter",
             **{"xticks_on": "anomalies", "ylim": (0, 1)})

In [None]:
plot_limits_halfwidth(df[col]['2022-03-29':'2022-04-14'], df_out.anomaly['2022-03-29':'2022-04-14'], df_out.level_high['2022-03-29':'2022-04-14'], df_out.level_low['2022-03-29':'2022-04-14'], save=True, **{"xticks_on": "anomalies", "ylim": (0, 1)})

In [None]:
df_compare = pd.read_csv("inverter/comparison_output.csv", index_col=0)
df_compare.index = pd.to_datetime(df_compare.index, utc=True)
df_compare = df_compare.rename(columns={
    'HST': r"\begin{center}Half-Space\\Trees\\T~[-]\end{center}",
    'OSVM': r"\begin{center}One-Class\\SVM\\T~[-]\end{center}",
    "ICDF": r"\begin{center}\textbf{Real-Time\\OD}\\T~[-]\end{center}"})

In [None]:
plot_compare_anomalies_(df[col], df_compare, save=True, file_name="inverter/",
                        **{"xticks_on": "anomalies", "ylim": (0, 1)})