In [None]:
from termcolor import colored

import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

import os
from os import listdir
from os.path import isfile, join

from pathlib import Path
import sys
PROJECT_DIR =Path(os.path.abspath('')).parents[1]
sys.path.append(os.fspath(PROJECT_DIR))

from pipeline.definitions import *
from pipeline.preprocessing.data_preprocessing import statistical_prepro

from os import listdir
from os.path import isfile, join

import matplotlib as mpl
import colorsys
import matplotlib.colors as mc
import itertools
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import train_test_split
from pipeline.monitoring_fx import *

## Define general parameters for figures

In [None]:
graph_setting="notebook" #or "article"

In [None]:
if graph_setting=="article":
    #journal-quality parameter settings
    resolution_factor=2
    desired_font=10

elif graph_setting=="notebook":
    resolution_factor=1
    desired_font=12

#conversion factors
cm_to_inch=0.393701
classic_proportion=6.4/4.8
golden_rate=1.618

#conversion factors
cm_to_inch=0.393701
classic_proportion=6.4/4.8
golden_rate=1.618

#Elsevier column width is 8.4 cm, double-column width is 17.7 cm (in inches: 3.31 and 6.97)
small_figsize=(resolution_factor*3.31, resolution_factor*3.31/classic_proportion)
big_figsize=(resolution_factor*6.97, resolution_factor*6.97/classic_proportion)
#other figure sizes
square_figsize=(resolution_factor*3.31, resolution_factor*3.31)
long_ts_figsize=(resolution_factor*10.5, resolution_factor*2.4)

#define colors palette
colors={}
colors["P_phys"]="sandybrown" 
colors["P_semiPar"]="indianred"
colors["P_blackbox"]="mediumpurple"

#changings regarding fonttypex
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.family'] = "Arial"

font_size=resolution_factor*desired_font

#define path for figures
figures_path="C:/Users/simone.eiraudo/OneDrive - Politecnico di Torino/Comodino/PhD/final defense/images"
#check existance of figure path
if not os.path.exists(figures_path):
    print("The selected directory to store figures does not exist")

#define colors for images
colors={}
colors["P"]="gray" 
colors["P_pred"]="indianred"
colors["cum_errors"]="gray" 
colors["real_anom"]="gray"
colors["det_anomaly"]="indianred"
colors["threshold"]="red"
colors["c_plus"]="lightsalmon" 
colors["c_minus"]="springgreen" 

## Data Import

In [None]:
# Import a case study dataset using pandas
method_short="ts" #that is, time serie, or ew, that is elementWise
dataset = pd.read_csv(os.path.join(DATA_CLEAN, 'buildings_dataset_clean.csv'))
print("Shape of dataset: "+str(dataset.shape))

#load dataset description
data_description=pd.read_csv(DATASETS+'/buildings_data_description.csv')

# load residuals time series
file_name="residuals"
with open(RESULTS+"/"+file_name+".pkl", 'rb') as f:
    residuals= pickle.load(f)

## load ID converter
#load dataset description
sim_ID_conv= pd.read_excel(os.path.join(DATASETS, "ID_converter_sim.xlsx"))

In [None]:
#merge results and original dataset
dataset["residuals_semiPar"]=np.nan
dataset["residuals_ES"]=np.nan
IDs=dataset.ID.unique()
for caseStudy in IDs:
    res_ES=residuals["ES", caseStudy]
    res_semiPar=residuals["semiPar", caseStudy]
    dataset.loc[dataset.ID==caseStudy, "residuals_ES"]=res_ES.values
    dataset.loc[dataset.ID==caseStudy, "residuals_semiPar"]=res_semiPar.values

#limitate analysis to simulated caseStudies with anomalies
abnormal_buildings_original_ID=sim_ID_conv.loc[["anomaly" in build for build in sim_ID_conv.generator_names], "my_names"].values
dataset_anomalies=dataset.loc[dataset.ID.isin(abnormal_buildings_original_ID), :]

#create a separate dataframe for compartive analysis (anomaly VS no anomaly)
normal_buildings_original_ID=sim_ID_conv.loc[[("anomaly" not in build) and ("retrofit" not in build) for build in sim_ID_conv.generator_names], "my_names"].values
dataset_normal=dataset.loc[dataset.ID.isin(normal_buildings_original_ID), :]

## Check predictions

#### Full time serie

In [None]:
plots_IDs=IDs
n_plots=len(plots_IDs)

if graph_setting=="article":
    fig_size=big_figsize
elif graph_setting=="notebook":
    fig_size=(long_ts_figsize[0], long_ts_figsize[1]*n_plots)

fig, axs = plt.subplots(n_plots, figsize=fig_size)

for caseStudy, ax in zip(plots_IDs, axs):
    
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    df=df.iloc[:8760, :]
    daily_P_real=df.P.rolling(24, min_periods=1).mean()
    daily_P_pred=(df.P-df.residuals_semiPar).rolling(24, min_periods=1).mean()
    ax.plot(daily_P_real/1000, color=colors["P"])
    ax.plot(daily_P_pred/1000, color=colors["P_pred"])
    ax.tick_params(labelsize=font_size)
    ax.set_ylabel(data_description.loc[data_description["Variable_name"]=="P", "variable_label"].values[0], fontsize=font_size)
    if ax!=axs[1]:
        ax.set_xticks([])
        
    print(caseStudy+": predictions MAPE: "+str(mean_absolute_percentage_error(df.P, df.P+df.residuals_semiPar))+"\n")

axs[0].text(-0.1, 0.5, 'a)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes)
axs[1].text(-0.1, 0.5, 'b)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[1].transAxes)
axs[0].legend(["Real", "Predicted"], fontsize=font_size)
axs[1].set_xlabel("Time $[h]$", fontsize=font_size)
plt.tight_layout()

#fig.savefig(figures_path+"/pred_ts_results", bbox_inches='tight', dpi=200)

#### Zoom on winter

In [None]:
plots_IDs=[IDs[0], IDs[1]]
n_plots=len(plots_IDs)

if graph_setting=="article":
    fig_size=big_figsize
elif graph_setting=="notebook":
    fig_size=(long_ts_figsize[0], long_ts_figsize[1]*n_plots)

fig, axs = plt.subplots(n_plots, figsize=fig_size)
    
zoom_on=range(1500,1668)

for caseStudy, ax in zip(plots_IDs, axs):
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    df=df.iloc[zoom_on, :]
    ax.plot(df.P/1000, color=colors["P"])
    ax.plot((df.P-df.residuals_semiPar)/1000, color=colors["P_pred"])
    ax.tick_params(labelsize=font_size-2)
    ax.set_ylabel(data_description.loc[data_description["Variable_name"]=="P", "variable_label"].values[0], fontsize=font_size)
    if ax!=axs[1]:
        ax.set_xticks([])

axs[0].text(-0.1, 0.5, 'a)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes)
axs[1].text(-0.1, 0.5, 'b)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[1].transAxes)
axs[0].legend(["Real", "Predicted"], fontsize=font_size)
axs[1].set_xlabel("Time $[h]$", fontsize=font_size)
plt.tight_layout()

#fig.savefig(figures_path+"/pred_ts_results_winter", bbox_inches='tight', dpi=200)

#### Zoom on summer

In [None]:
plots_IDs=[IDs[0], IDs[1]]
n_plots=len(plots_IDs)

if graph_setting=="article":
    fig_size=big_figsize
elif graph_setting=="notebook":
    fig_size=(long_ts_figsize[0], long_ts_figsize[1]*n_plots)

fig, axs = plt.subplots(n_plots, figsize=fig_size)

zoom_on=range(5000,5168)

for caseStudy, ax in zip(plots_IDs, axs):
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    df=df.iloc[zoom_on, :]
    ax.plot(df.P/1000, color=colors["P"])
    ax.plot((df.P-df.residuals_semiPar)/1000, color=colors["P_pred"])
    ax.tick_params(labelsize=font_size-2)
    ax.set_ylabel(data_description.loc[data_description["Variable_name"]=="P", "variable_label"].values[0], fontsize=font_size)
    if ax!=axs[1]:
        ax.set_xticks([])

axs[0].text(-0.1, 0.5, 'a)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes)
axs[1].text(-0.1, 0.5, 'b)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[1].transAxes)
axs[0].legend(["Real", "Predicted"], fontsize=font_size)
axs[1].set_xlabel("Time $[h]$", fontsize=font_size)
plt.tight_layout()

#fig.savefig(figures_path+"/pred_ts_results_summer", bbox_inches='tight', dpi=200)

## Check residuals distribution

In [None]:
caseStudy=IDs[0]
df=dataset.loc[dataset.ID==caseStudy, :]

fig, ax = plt.subplots(2,2, figsize=(8,6))
plot_residuals(df.residuals_ES, df.T_a, ax=ax[0])
plot_residuals(df.residuals_semiPar, df.T_a, ax=ax[1])
fig.tight_layout()
ax[0,0].set_ylabel("ES model residuals")
ax[1,0].set_ylabel("Semipar model residuals")
plt.show()

### Normalize errors

In [None]:
#calculate normalized residuals
dataset["normalized_residuals_semiPar"]=np.nan

for caseStudy in IDs:
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    df["normalized_residuals_semiPar"]=df["residuals_semiPar"].values/df.P.values
    dataset.loc[dataset.ID==caseStudy, "normalized_residuals_semiPar"]=df["normalized_residuals_semiPar"].values

#### Compare errors and normalized errors

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(square_figsize[0]*2, square_figsize[1]))

caseStudy=IDs[0]

for col, ax in zip(["residuals_semiPar", "normalized_residuals_semiPar"], axs):
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    
    if col=="P":
        df[col]=df[col]/1000
        
    ax.scatter(df.P, df[col], color="gray", s=2)
    ax.tick_params(labelsize=font_size-2)

    ax.set_xlabel(data_description.loc[data_description["Variable_name"]=="T_a", "variable_label"].values[0], fontsize=font_size)

    ylims=[-df[col].max(), +df[col].max()]
    
    if ax==axs[1]:
        ylims=[ylims[0]-0.05, ylims[1]+0.05]

    ax.set_ylim(ylims)

axs[0].set_ylabel("Errors $[kW]$", fontsize=font_size)
axs[1].set_ylabel("Normalized Errors [-]", fontsize=font_size)

plt.tight_layout()

#fig.savefig(figures_path+"/scatter_errors_VS_temp", bbox_inches='tight', dpi=200)

## Check cumulative errors

In [None]:
plots_IDs=IDs
n_plots=len(plots_IDs)

if graph_setting=="article":
    fig_size=big_figsize
elif graph_setting=="notebook":
    fig_size=(long_ts_figsize[0], long_ts_figsize[1]*n_plots)

fig, axs = plt.subplots(n_plots, figsize=fig_size)

for caseStudy, ax in zip(plots_IDs, axs):
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    df=df.iloc[8760:, :]
    cum_err=np.cumsum(df.residuals_semiPar/1000)
    ax.plot(cum_err, color=colors["cum_errors"])
    ax.tick_params(labelsize=font_size-2)
    ax.set_ylabel("Electrical energy $[kWh]$", fontsize=font_size)
    if ax!=axs[1]:
        ax.set_xticks([])
    ax.legend(["Cumulative of errors \nfor "+caseStudy], fontsize=font_size-2)
    print("Cumulated deviation for"+caseStudy+" :"+str(cum_err[-1:].values))
    print("Total energy demand for"+caseStudy+" :"+str(df.P.sum()/1000))
    print("Relative deviation for "+caseStudy+" :"+str(cum_err[-1:].values/(df.P.sum())*1000))

#axs[0].legend(["Real", "Predicted"], fontsize=font_size)
axs[0].text(-0.1, 0.5, 'a)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes)
axs[1].text(-0.1, 0.5, 'b)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[1].transAxes)
axs[1].set_xlabel("Time $[h]$", fontsize=font_size)
plt.tight_layout()

#fig.savefig(figures_path+"/cum_of_errors_ts_results", bbox_inches='tight', dpi=200)

## Check CUMSUM tab

In [None]:
plots_IDs=IDs
n_plots=len(plots_IDs)

if graph_setting=="article":
    fig_size=big_figsize
elif graph_setting=="notebook":
    fig_size=(long_ts_figsize[0], long_ts_figsize[1]*n_plots)

fig, axs = plt.subplots(n_plots, figsize=fig_size)

for caseStudy, ax in zip(plots_IDs, axs):
    df=dataset.loc[dataset.ID==caseStudy].reset_index()
    res=df["residuals_semiPar"]
    norm_res=res/df.P
    _, _, res_setup, res_monitor = train_test_split(norm_res, norm_res, test_size=0.33, shuffle=False)
    Cpos, Cneg, anomaly_threshold ,sigma= cusum_tab(res_setup, x_monitor=res_monitor, k=1, h=24, moving_range=False)
    Cpos=pd.Series(Cpos)
    Cneg=pd.Series(Cneg)
    
    ax.plot(Cpos, label=r"$C_+$", color=colors["c_plus"])
    ax.plot(Cneg, label=r"$C_-$", color=colors["c_minus"])
    ax.hlines(y=anomaly_threshold, xmin=Cpos.index[0], xmax=Cpos.index[-1], ls="-.", color=colors["threshold"], label="H")
    
    # # ax.set_xticks(ax.get_xticks())
    # # #ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    # 
    ax.grid()

    ax.tick_params(labelsize=font_size-2)
    ax.set_ylabel("Electrical energy $[kWh]$", fontsize=font_size)
    if ax!=axs[-1]:
        ax.set_xticklabels([])

# axs[0].text(-0.1, 0.5, 'a)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes)
# axs[1].text(-0.1, 0.5, 'b)', fontsize=font_size, horizontalalignment='center', verticalalignment='center', transform=axs[1].transAxes)
axs[0].legend(fontsize=font_size)
axs[-1].set_xlabel("Time $[h]$", fontsize=font_size)
plt.tight_layout()

#fig.savefig(figures_path+"/cumsumtab_example", bbox_inches='tight', dpi=200)

# Anomaly detection

In [None]:
abnormal_IDs=dataset_anomalies.ID.unique()

In [None]:
if len(abnormal_IDs)==0:
    print(colored("No abnormal events exist in the employed dataset\nThe remaining cells of the present notebook will not provide any result", "red"))

## Tune control chart parameters

In [None]:
#set the weight for sensitivity (precision will be 1-w_sens)
w_sens=0.8

#caseStudy=IDs[0]
abnormal_IDs=dataset_anomalies.ID.unique()
ks=np.linspace(0.1, 3.5, 18)
hs=np.linspace(3, 48, 16)
anomaly_detection_score_matrix=pd.DataFrame(index=ks, columns=hs)
f1_score_matrix=pd.DataFrame(index=ks, columns=hs)
sensitivity_score_matrix=pd.DataFrame(index=ks, columns=hs)
precision_score_matrix=pd.DataFrame(index=ks, columns=hs)
det_delay_matrix=pd.DataFrame(index=ks, columns=hs)

for k in ks:
    for h in hs:
        df, confusion_matrix_dict, total_con_mat, det_delay = calculate_confusion_matrix(dataset_anomalies, abnormal_IDs, k=k, h=h)
        anomaly_detection_score_matrix.loc[k, h] = anomaly_detection_score(total_con_mat, w_sensitivity = w_sens)
        f1_score_matrix.loc[k, h] =f1_from_matrix(total_con_mat)
        sensitivity_score_matrix.loc[k, h] =sensitivity_from_matrix(total_con_mat)
        precision_score_matrix.loc[k, h] =precision_from_matrix(total_con_mat)
        det_delay_matrix.loc[k, h] =det_delay.median(axis=1).values[0]

In [None]:
anomaly_detection_score_matrix
anomaly_detection_score_matrix.style.applymap(lambda x: "background-color: palegreen" if x>0.77 else "background-color: white")

In [None]:
#select best configurations
n_config=300
matrix_copy=det_delay_matrix.copy()
best_config_pos={}
best_config_params={}

for i in range(n_config):
    matrix_copy=matrix_copy.fillna(1000)
    min_value=matrix_copy.min().min()
    idx  = locate_in_df(matrix_copy, min_value)
    best_config_params[i]="k="+str(matrix_copy.index[idx[0]].round(2))+", h="+str(matrix_copy.columns[idx[1]])
    matrix_copy.iloc[idx]=1000
    # ind = np.unravel_index(np.argmax(matrix_copy, axis=None), matrix_copy.shape)
    best_config_pos[i]=idx
    # matrix_copy.iloc[ind]=0

best_config=pd.DataFrame(index=list(best_config_pos.values()),columns=["Anomaly Score", "Sensitivity", "Precision", "Detection delay"])
best_config["Parameters"]=best_config_params.values()

for ind in best_config.index:
    r , c = ind
    best_config.loc[best_config.index==ind, "Anomaly Score"]=anomaly_detection_score_matrix.iloc[r, c].round(3)
    best_config.loc[best_config.index==ind, "Sensitivity"]= sensitivity_score_matrix.iloc[r, c].round(3)
    best_config.loc[best_config.index==ind, "Precision"]= precision_score_matrix.iloc[r, c].round(3)
    best_config.loc[best_config.index==ind, "Detection delay"]= det_delay_matrix.iloc[r, c].round(3)

best_config=best_config.set_index("Parameters")

#select two best configurations from the ones that detect anolmalies rapadly
best_config_fast=best_config.loc[best_config.Sensitivity>0.85]
best_config_fast=best_config_fast.loc[best_config_fast.Precision>0.1].head(5)

## select two best configurations from the ones that have the highest precision and highest sensitivity
best_config_precise=best_config.loc[best_config.Sensitivity>0.9]
best_config_precise=best_config_precise.loc[best_config_precise.Precision>0.15].sort_values(by="Anomaly Score", ascending=False).head(5)

In [None]:
display(best_config_fast)
display(best_config_precise)

## Anomaly detection step

In [None]:
dataset_anomalies.loc[:, "detected_anomaly"]=np.nan

In [None]:
k=0.9
h=42
#detect anomaly (it will be done for all case studies)
new_dataset, confusion_matrix_dict, total_con_mat, det_delay = calculate_confusion_matrix(dataset_anomalies, abnormal_IDs, k=k, h=h)

In [None]:
det_delay

In [None]:
#divide anomalies according to type of anomaly
n_anomalies=len(set([x.split("_")[1][2] for x in dataset_anomalies.ID.unique()]))
n_simulations=int(len(dataset_anomalies.ID.unique())/n_anomalies)
det_delay=pd.DataFrame(np.reshape(det_delay, [n_simulations,n_anomalies]), index=range(1, n_simulations+1), columns=["Anomaly A", "Anomaly B", "Anomaly C", "Anomaly D","Anomaly E"])
det_delay.index.name="Experiment"

det_delay

## Check detection of different types of anomaly

In [None]:
caseStudy_1="building_1010"
caseStudy_2="building_1020"
caseStudy_3="building_1030"
caseStudy_4="building_1040"
caseStudy_5="building_1050"

dfs_from_generator={}

#check magnitude of anomalies (just for anomaly A and B)
for caseStudy in [caseStudy_1, caseStudy_2,caseStudy_3, caseStudy_4, caseStudy_5 ]:
    dfs_from_generator[caseStudy]=retrive_data_from_generator(caseStudy)

print("Anomaly A: degradation of COP: "+str(dfs_from_generator[caseStudy_1].cop_anom_degr.unique().max()*100)+" %")
print("Anomaly B: precentage of rack under maintenance: "+str((1-dfs_from_generator[caseStudy_2].P_tlc.unique().min()/dfs_from_generator[caseStudy_2].P_tlc.unique().max()).round(4)*100)+" %")

In [None]:
caseStudies=[caseStudy_1,caseStudy_2,caseStudy_3,caseStudy_4,caseStudy_5]

for anomaly, caseStudy in zip(["A", "B", "C", "D", "E"], caseStudies):

    if graph_setting=="article":
        fig_size=big_figsize
    elif graph_setting=="notebook":
        fig_size=(long_ts_figsize[0], long_ts_figsize[1]*4)
        
    fig, axs = plt.subplots(4, figsize=fig_size)
    axs[0].set_title("Casestudy: "+caseStudy)
    df=new_dataset.loc[dataset.ID==caseStudy].reset_index()
    df_normal=dataset_normal.loc[dataset_normal.ID==caseStudy[:-2]+"00"].reset_index()
    cum_err=np.cumsum(df.residuals_semiPar/1000)
    res=df["residuals_semiPar"]
    norm_res=res/df.P
    _, _, res_setup, res_monitor = train_test_split(norm_res, norm_res, test_size=0.33, shuffle=False)
    Cpos, Cneg, anomaly_threshold ,sigma= cusum_tab(res_setup, x_monitor=res_monitor, k=k, h=h, moving_range=False)
    Cpos=pd.Series(Cpos)
    Cneg=pd.Series(Cneg)

    #calculate cumulative deviations (estimated and real)
    anom_start_cum_value=cum_err.loc[df.loc[df.anomaly==1, :].index[0]]  #calculate the cumulative deviation starting from the time step of start of the anomaly
    cum_est_dev=cum_err-anom_start_cum_value
    
    real_dev=((df.P-df_normal.P)/1000).cumsum()
    anom_start_cum_value=real_dev.loc[df.loc[df.anomaly==1, :].index[0]]  #calculate the cumulative deviation starting from the time step of start of the anomaly
    real_dev=real_dev-anom_start_cum_value
    
    anom_index=df.loc[df.anomaly==1, :].index
    zoom_on=range(anom_index.min()-5, anom_index.max()+5)

    #calculate percentage of additional consumption
    add_power=(df.residuals_semiPar/df.P).loc[range(anom_index.min(), anom_index.max())].mean()
    #calculate wasted energy
    add_energy=real_dev[anom_index.max()]
    est_add_energy=cum_est_dev[anom_index.max()]
    
    #calculate wasted energy at the time of detection of anomaly
    if np.isnan(df.loc[zoom_on].loc[df.detected_anomaly==1, :].index.min())==False:
        add_energy_time_of_detection=real_dev[df.loc[zoom_on].loc[df.detected_anomaly==1, :].index.min()]
    else:
        print(colored("WARNING: no anomaly was detected for casestudy "+caseStudy, "red"))
        add_energy_time_of_detection=np.zeros(len(real_dev))

    #calculate the percentage of wasted energy that would be recovered by immidiate intervenction
    
    axs[0].plot((df.P/1000).loc[zoom_on], color=colors["P"])
    axs[0].plot(((df.P-df.residuals_semiPar)/1000).loc[zoom_on], color=colors["P_pred"])
    
    axs[1].plot(Cpos.loc[zoom_on], label=r"$C_+$", color=colors["c_plus"])
    axs[1].plot(Cneg.loc[zoom_on], label=r"$C_-$", color=colors["c_minus"])
    axs[1].hlines(y=anomaly_threshold, xmin=anom_index.min()-5, xmax=anom_index.max()+5, ls="-.", color=colors["threshold"], label="H")

    axs[2].plot(df.anomaly.loc[zoom_on], color=colors["real_anom"])
    axs[2].plot(df.detected_anomaly.loc[zoom_on], color=colors["det_anomaly"])

    
    axs[3].plot(real_dev.loc[zoom_on], color=colors["real_anom"])
    axs[3].plot(cum_est_dev.loc[zoom_on], color=colors["det_anomaly"])
    
    
    for ax in axs:
        ax.tick_params(labelsize=font_size-2)
        ax.grid()
        ax.set_xlim(anom_index.min()-5, anom_index.max()+5)
        
        if ax!=axs[3]:
            ax.set_xticklabels([])
    
    axs[0].set_ylabel("Electrical \nload $[kW]$", fontsize=font_size)
    axs[1].set_ylabel("Electrical \nenergy $[-]$", fontsize=font_size)
    axs[2].set_ylabel("Anomaly \ndetected", fontsize=font_size)
    axs[3].set_ylabel("Deviation of\n electrical \nenergy $[kWh]$", fontsize=font_size)


    for ax in [axs[0], axs[2], axs[3]]:
        ax.legend(["Real", "Predicted"], fontsize=font_size-2)

    axs[1].legend(fontsize=font_size)
    axs[3].set_xlabel("Time $[h]$", fontsize=font_size-2)

    plt.tight_layout()
    #fig.savefig(figures_path+"/detection_of_anomaly_"+anomaly, bbox_inches='tight', dpi=200)

    print("Mean additional power: "+str(add_power))
    print("Wasted energy: Predicted / Real : "+str(est_add_energy)+" / "+str(add_energy))
    print("Wasted energy at the time of anomaly detection: "+ str(add_energy_time_of_detection))

## Check the undetected anomaly

In [None]:
caseStudy="building_1010"

#check magnitude of anomalies (just for anomaly A and B)
dfs_from_generator[caseStudy]=retrive_data_from_generator(caseStudy)

print("Anomaly A: degradation of COP: "+str(dfs_from_generator[caseStudy].cop_anom_degr.unique().max()*100)+" %")
    
if graph_setting=="article":
    fig_size=big_figsize
elif graph_setting=="notebook":
    fig_size=(long_ts_figsize[0], long_ts_figsize[1]*4)

fig, axs = plt.subplots(4, figsize=fig_size)

axs[0].set_title("Casestudy: "+caseStudy)

df=new_dataset.loc[dataset.ID==caseStudy].reset_index()
df_normal=dataset_normal.loc[dataset_normal.ID==caseStudy[:-2]+"00"].reset_index()
cum_err=np.cumsum(df.residuals_semiPar/1000)
res=df["residuals_semiPar"]
norm_res=res/df.P
_, _, res_setup, res_monitor = train_test_split(norm_res, norm_res, test_size=0.33, shuffle=False)
Cpos, Cneg, anomaly_threshold ,sigma= cusum_tab(res_setup, x_monitor=res_monitor, k=k, h=h, moving_range=False)
Cpos=pd.Series(Cpos)
Cneg=pd.Series(Cneg)

#calculate cumulative deviations (estimated and real)
anom_start_cum_value=cum_err.loc[df.loc[df.anomaly==1, :].index[0]]  #calculate the cumulative deviation starting from the time step of start of the anomaly
cum_est_dev=cum_err-anom_start_cum_value

real_dev=((df.P-df_normal.P)/1000).cumsum()
anom_start_cum_value=real_dev.loc[df.loc[df.anomaly==1, :].index[0]]  #calculate the cumulative deviation starting from the time step of start of the anomaly
real_dev=real_dev-anom_start_cum_value

anom_index=df.loc[df.anomaly==1, :].index
zoom_on=range(anom_index.min()-5, anom_index.max()+5)

#calculate percentage of additional consumption
add_power=(df.residuals_semiPar/df.P).loc[range(anom_index.min(), anom_index.max())].mean()
#calculate wasted energy
add_energy=real_dev[anom_index.max()]
est_add_energy=cum_est_dev[anom_index.max()]

axs[0].plot((df.P/1000).loc[zoom_on], color=colors["P"])
axs[0].plot(((df.P-df.residuals_semiPar)/1000).loc[zoom_on], color=colors["P_pred"])

axs[1].plot(Cpos.loc[zoom_on], label=r"$C_+$", color=colors["c_plus"])
axs[1].plot(Cneg.loc[zoom_on], label=r"$C_-$", color=colors["c_minus"])
axs[1].hlines(y=anomaly_threshold, xmin=anom_index.min(), xmax=anom_index.max(), ls="-.", color=colors["threshold"], label="H")

axs[2].plot(df.anomaly.loc[zoom_on], color=colors["real_anom"])
axs[2].plot(df.detected_anomaly.loc[zoom_on], color=colors["det_anomaly"])


axs[3].plot(real_dev.loc[zoom_on], color=colors["real_anom"])
axs[3].plot(cum_est_dev.loc[zoom_on], color=colors["det_anomaly"])


for ax in axs:
    ax.tick_params(labelsize=font_size-2)
    ax.grid()
    ax.set_xlim(anom_index.min()-5, anom_index.max()+5)
    
    if ax!=axs[3]:
        ax.set_xticklabels([])

axs[0].set_ylabel("Electrical \nload $[kW]$", fontsize=font_size)
axs[1].set_ylabel("Electrical \nenergy $[-]$", fontsize=font_size)
axs[2].set_ylabel("Anomaly \ndetected", fontsize=font_size)
axs[3].set_ylabel("Deviation of\n electrical \nenergy $[kWh]$", fontsize=font_size)


for ax in [axs[0], axs[2], axs[3]]:
    ax.legend(["Real", "Predicted"], fontsize=font_size-2)

axs[1].legend(fontsize=font_size)
axs[3].set_xlabel("Time $[h]$", fontsize=font_size-2)

plt.tight_layout()
fig.savefig(figures_path+"/undetected_anomaly", bbox_inches='tight', dpi=200)

print("Mean additional power: "+str(add_power))
print("Wasted energy: Predicted / Real : "+str(est_add_energy)+" / "+str(add_energy))
