In [1]:
# libraries importing
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn import metrics
import numpy as np
import os
from itertools import product

colors = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf']

import sys
sys.path.append('../')

from algorithms.Conv_AE import Conv_AE
from data_processing.process_data import process_data, add_rolling_stats, get_single_df

from sklearn.decomposition import PCA
from sklearn.feature_selection import RFE
from sklearn import metrics
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier

from feature_engine.selection import SmartCorrelatedSelection, SelectBySingleFeaturePerformance, RecursiveFeatureAddition
from feature_engine.outliers import Winsorizer

## Data loading

In [2]:
datasets = process_data()

valve1_X =  datasets["valve1_X"]
valve1_y = datasets["valve1_y"]
valve2_X = datasets["valve2_X"]
valve2_y = datasets["valve2_y"]
other_anomaly_X = datasets["other_anomaly_X"]
other_anomaly_y = datasets["other_anomaly_y"]

In [3]:
X, y = get_single_df()

In [4]:
# hyperparameters selection
N_STEPS = 120
Qs = np.arange(0.25, 0.90, 0.05) # quantile for upper control limit (UCL) selection
model = Conv_AE()

In [5]:
def test_train_split(df_X, df_y):
    size_train = int(df_X.shape[0]*0.8)
    size_test = df_X.shape[0] - size_train
    x_train = df_X[:size_train]
    y_train = df_y[:size_train].anomaly
    x_test = df_X[-size_test:]
    y_test = df_y[-size_test:].anomaly
    return x_train, y_train, x_test, y_test

In [6]:
# x_train_valve1, y_train_valve1, x_test_valve1, y_test_valve1 = test_train_split(valve1_X, valve1_y)
# x_train_valve2, y_train_valve2, x_test_valve2, y_test_valve2 = test_train_split(valve2_X, valve2_y)
# x_train_other_anomaly, y_train_other_anomaly, x_test_other_anomaly, y_test_other_anomaly = test_train_split(other_anomaly_X, other_anomaly_y)

In [7]:
# x_train, y_train, x_test, y_test = test_train_split(X, y)

__Add rolling stats__

In [8]:
# x_train_valve1 = add_rolling_stats(x_train_valve1, '1min', x_train_valve1.columns)
# x_test_valve1 = add_rolling_stats(x_test_valve1, '1min', x_test_valve1.columns)
# x_train_valve2 = add_rolling_stats(x_train_valve2, '1min', x_train_valve2.columns)
# x_test_valve2 = add_rolling_stats(x_test_valve2, '1min', x_test_valve2.columns)
# x_train_other_anomaly = add_rolling_stats(x_train_other_anomaly, '1min', x_train_other_anomaly.columns)
# x_test_other_anomaly = add_rolling_stats(x_test_other_anomaly, '1min', x_test_other_anomaly.columns)

# x_train = add_rolling_stats(x_train, '1min', x_train.columns)
# x_test = add_rolling_stats(x_test, '1min', x_test.columns)


__Winsorizer__

In [9]:
wz = Winsorizer(capping_method='quantiles', tail='both', fold=3)

__Standard scaler__

In [10]:
sc = StandardScaler()

__PCA__

In [11]:
pca = PCA(n_components='mle', svd_solver='full')

__RFE based on SVM__

In [12]:
estimator = SVR(kernel="linear")
rfe = RFE(estimator, n_features_to_select=3, step=1)

__Feature selection by single feature performance using random forest estimator__

In [13]:
sfp = SelectBySingleFeaturePerformance(
                    RandomForestClassifier(random_state=42),
                    cv=2)

__Feature selection by information value__

In [14]:
rfa = RecursiveFeatureAddition(RandomForestClassifier(random_state=42), cv=3)

__Smart correlated features__

In [15]:
scs = SmartCorrelatedSelection(threshold=0.8)

In [16]:
functions = [wz, sc, pca, sfp, scs]

In [17]:
names = {type(wz): 'Winsorizer', type(sc): "Standard Scaler", type(pca): "PCA", type(rfe): "RFE", type(sfp): "SFP", type(rfa): "RFA", type(scs): "SCS"}

In [18]:
num_steps = [1, 2, 3]

In [19]:
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=N_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

In [20]:
# # x_train_steps_valve1 = create_sequences(np.array([row.values for i, row in x_train_valve1.iterrows()]), N_STEPS)
# # x_test_steps_valve1 = create_sequences(np.array([row.values for i, row in x_test_valve1.iterrows()]), N_STEPS)

# # x_train_steps_valve2 = create_sequences(np.array([row.values for i, row in x_train_valve2.iterrows()]), N_STEPS)
# # x_test_steps_valve2 = create_sequences(np.array([row.values for i, row in x_test_valve2.iterrows()]), N_STEPS)

# # x_train_steps_other_anomaly = create_sequences(np.array([row.values for i, row in x_train_other_anomaly.iterrows()]), N_STEPS)
# # x_test_steps_other_anomaly = create_sequences(np.array([row.values for i, row in x_test_other_anomaly.iterrows()]), N_STEPS)

# x_train_steps_valve1 = create_sequences(x_train_valve1, N_STEPS)
# x_test_steps_valve1 = create_sequences(x_test_valve1, N_STEPS)

# x_train_steps_valve2 = create_sequences(x_train_valve2, N_STEPS)
# x_test_steps_valve2 = create_sequences(x_test_valve2, N_STEPS)

# x_train_steps_other_anomaly = create_sequences(x_train_other_anomaly, N_STEPS)
# x_test_steps_other_anomaly = create_sequences(x_test_other_anomaly, N_STEPS)

In [21]:
# x_train_steps = create_sequences(x_train, N_STEPS)
# x_test_steps = create_sequences(x_test, N_STEPS)

#### Test model for combined dataset

In [22]:
results_combined = pd.DataFrame(columns = ['Dataset', 'steps', 'Q', 'TPR', 'TNR', 'PPV', 'NPV', 'FPR', 'FNR', 'FDR', 'ACC'])
# for i in num_steps:

# for combination in product(functions, repeat=3):

#     if len(set(combination)) == i:
combination = [wz, pca, sfp]
x_train, y_train, x_test, y_test = test_train_split(X, y)

for func in combination:
    func.fit(x_train, y_train)
    x_train = func.transform(x_train)
    x_test = func.transform(x_test)

print(x_train)

x_train_steps = create_sequences(x_train, N_STEPS)
x_test_steps = create_sequences(x_test, N_STEPS)

model.fit(x_train_steps)

for Q in Qs:

    # results predicting
    residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps - model.predict(x_train_steps)), axis=1), axis=1))
    UCL = residuals.quantile(Q)

    # train prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps - model.predict(x_train_steps)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_train_steps) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_train = pd.Series(data=0, index=np.arange(len(x_train)))
    yhat_train.iloc[anomalous_data_indices] = 1

    # test prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps - model.predict(x_test_steps)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_test_steps) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_test = pd.Series(data=0, index=np.arange(len(x_test)))
    yhat_test.iloc[anomalous_data_indices] = 1

    conf_matrix = metrics.confusion_matrix(y_test, yhat_test)

    TN, FP, FN, TP = conf_matrix.ravel()

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Specificity or true negative rate
    TNR = TN/(TN+FP)
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate FAR false alarm rate
    FPR = FP/(FP+TN)
    # False negative rate MAR missing alarm rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)
    # Overall accuracy
    ACC = (TP+TN)/(TP+FP+FN+TN)

    row = dict(Dataset = 'combined',
            steps = [[names[type(func)] for func in combination]],
            Q = Q,
            TPR = TPR,
            TNR = TNR,
            PPV = PPV,
            NPV = NPV,
            FPR = FPR,
            FNR = FNR,
            FDR = FDR,
            ACC = ACC)

    results_combined = pd.concat([results_combined, pd.DataFrame(row, index = [0])], ignore_index = True)

              x0         x1        x3        x4        x6
0     -18.142057   0.186817 -0.978820 -0.419317  0.034431
1     -18.523987   1.125595 -0.873075  0.438716  0.018024
2     -18.211413  -9.492334 -0.991599  0.243105  0.030314
3     -17.781370  -2.028439 -0.860249  0.052612  0.026709
4     -18.355121   8.924017 -0.905835  0.456171  0.023981
...          ...        ...       ...       ...       ...
27774  73.374542   5.582130 -0.585879  0.403833 -0.003133
27775  73.748347 -10.955456 -0.494782  0.293747 -0.000780
27776  73.482112   1.086164 -0.580245  0.275961 -0.001442
27777  73.488716  -0.045322 -0.527681  0.294752 -0.002239
27778  73.292799   7.058035 -0.487908  0.330424 -0.004193

[27779 rows x 5 columns]




 61/865 [=>............................] - ETA: 1s

  results_combined = pd.concat([results_combined, pd.DataFrame(row, index = [0])], ignore_index = True)




In [23]:
# results_combined.sort_values(by=['FNR', 'FPR'])

In [24]:
# results_combined.to_csv("results_combined_modified_Conv_AE.csv")

In [25]:
for i, col in enumerate(results_combined.columns[1:]):
    if col in ['FPR', 'FNR']:
        fig = go.Figure()

        fig.add_trace(go.Scatter(mode='lines+text', x=results_combined.Q, y=results_combined[f'{col}'],
                                marker=dict(color=colors[i]),
                                texttemplate='%{y:.2f}', textposition='top center',
                                textfont=dict(color=colors[i], size=12),
                                name=f'{col}',
                                showlegend=True)
                        )

        fig.update_layout(height=400,width=900, template='plotly_white',
                        title=dict(text=f'{col} with different Q values', font=dict(size=18), x=.5, y=.95),
                        yaxis=dict(title=f'{col}', side='left', showgrid=True,),
                        xaxis=dict(title='Q', showgrid=False),
                        legend=dict(orientation="h", yanchor="bottom", y=1, x=0.5, xanchor="center"),
                        )

        fig.show()

#### Test model for valve 1

In [26]:
results_valve1 = pd.DataFrame(columns = ['Dataset', 'steps', 'Q', 'TPR', 'TNR', 'PPV', 'NPV', 'FPR', 'FNR', 'FDR', 'ACC'])
# for i in num_steps:

#     for combination in product(functions, repeat=i):

#         if len(set(combination)) == i:

combination = [wz, pca, sfp]

x_train_valve1, y_train_valve1, x_test_valve1, y_test_valve1 = test_train_split(valve1_X, valve1_y)

for func in combination:
    func.fit(x_train_valve1, y_train_valve1)
    x_train_valve1 = func.transform(x_train_valve1)
    x_test_valve1 = func.transform(x_test_valve1)

x_train_steps_valve1 = create_sequences(x_train_valve1, N_STEPS)
x_test_steps_valve1 = create_sequences(x_test_valve1, N_STEPS)

model.fit(x_train_steps_valve1)

for Q in Qs:

    # results predicting
    residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve1 - model.predict(x_train_steps_valve1)), axis=1), axis=1))
    UCL = residuals.quantile(Q)

    # train prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve1 - model.predict(x_train_steps_valve1)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_train_steps_valve1) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_train = pd.Series(data=0, index=np.arange(len(x_train_valve1)))
    yhat_train.iloc[anomalous_data_indices] = 1

    # test prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_valve1 - model.predict(x_test_steps_valve1)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_test_steps_valve1) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_test = pd.Series(data=0, index=np.arange(len(x_test_valve1)))
    yhat_test.iloc[anomalous_data_indices] = 1

    conf_matrix = metrics.confusion_matrix(y_test_valve1, yhat_test)

    TN, FP, FN, TP = conf_matrix.ravel()

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Specificity or true negative rate
    TNR = TN/(TN+FP)
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate FAR false alarm rate
    FPR = FP/(FP+TN)
    # False negative rate MAR missing alarm rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)
    # Overall accuracy
    ACC = (TP+TN)/(TP+FP+FN+TN)

    row = dict(Dataset = 'Valve 1',
            steps = [[names[type(func)] for func in combination]],
            Q = Q,
            TPR = TPR,
            TNR = TNR,
            PPV = PPV,
            NPV = NPV,
            FPR = FPR,
            FNR = FNR,
            FDR = FDR,
            ACC = ACC)

    results_valve1 = pd.concat([results_valve1, pd.DataFrame(row, index = [0])], ignore_index = True)



104/451 [=====>........................] - ETA: 0s


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.





In [27]:
# results_valve1.to_csv('results_valve1_modified_Conv_AE.csv')

In [28]:
for i, col in enumerate(results_valve1.columns[1:]):
    if col in ['FPR', 'FNR']:
        fig = go.Figure()

        fig.add_trace(go.Scatter(mode='lines+text', x=results_valve1.Q, y=results_valve1[f'{col}'],
                                marker=dict(color=colors[i]),
                                texttemplate='%{y:.2f}', textposition='top center',
                                textfont=dict(color=colors[i], size=12),
                                name=f'{col}',
                                showlegend=True)
                        )

        fig.update_layout(height=400,width=900, template='plotly_white',
                        title=dict(text=f'{col} with different Q values', font=dict(size=18), x=.5, y=.95),
                        yaxis=dict(title=f'{col}', side='left', showgrid=True,),
                        xaxis=dict(title='Q', showgrid=False),
                        legend=dict(orientation="h", yanchor="bottom", y=1, x=0.5, xanchor="center"),
                        )

        fig.show()

#### Test model for valve 2

In [29]:
results_valve2 = pd.DataFrame(columns = ['Dataset', 'steps', 'Q', 'TPR', 'TNR', 'PPV', 'NPV', 'FPR', 'FNR', 'FDR', 'ACC'])
# for i in num_steps:

#     for combination in product(functions, repeat=i):

#         if len(set(combination)) == i:

combination = [wz, pca, sfp]

x_train_valve2, y_train_valve2, x_test_valve2, y_test_valve2 = test_train_split(valve2_X, valve2_y)

for func in combination:
    func.fit(x_train_valve2, y_train_valve2)
    x_train_valve2 = func.transform(x_train_valve2)
    x_test_valve2 = func.transform(x_test_valve2)

x_train_steps_valve2 = create_sequences(x_train_valve2, N_STEPS)
x_test_steps_valve2 = create_sequences(x_test_valve2, N_STEPS)

model.fit(x_train_steps_valve2)

for Q in Qs:

    # results predicting
    residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve2 - model.predict(x_train_steps_valve2)), axis=1), axis=1))
    UCL = residuals.quantile(Q)

    # train prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve2 - model.predict(x_train_steps_valve2)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_train_steps_valve2) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_train = pd.Series(data=0, index=np.arange(len(x_train_valve2)))
    yhat_train.iloc[anomalous_data_indices] = 1

    # test prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_valve2 - model.predict(x_test_steps_valve2)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_test_steps_valve2) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_test = pd.Series(data=0, index=np.arange(len(x_test_valve2)))
    yhat_test.iloc[anomalous_data_indices] = 1

    conf_matrix = metrics.confusion_matrix(y_test_valve2, yhat_test)

    TN, FP, FN, TP = conf_matrix.ravel()

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Specificity or true negative rate
    TNR = TN/(TN+FP)
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate FAR false alarm rate
    FPR = FP/(FP+TN)
    # False negative rate MAR missing alarm rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)
    # Overall accuracy
    ACC = (TP+TN)/(TP+FP+FN+TN)

    row = dict(Dataset = 'Valve 2',
            steps = [[names[type(func)] for func in combination]],
            Q = Q,
            TPR = TPR,
            TNR = TNR,
            PPV = PPV,
            NPV = NPV,
            FPR = FPR,
            FNR = FNR,
            FDR = FDR,
            ACC = ACC)

    results_valve2 = pd.concat([results_valve2, pd.DataFrame(row, index = [0])], ignore_index = True)






The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.





In [30]:
# results_valve2.to_csv('results_valve2_modified_Conv_AE.csv')

In [31]:
for i, col in enumerate(results_valve2.columns[1:]):
    if col in ['FPR', 'FNR']:

        fig = go.Figure()

        fig.add_trace(go.Scatter(mode='lines+text', x=results_valve2.Q, y=results_valve2[f'{col}'],
                                marker=dict(color=colors[i]),
                                texttemplate='%{y:.2f}', textposition='top center',
                                textfont=dict(color=colors[i], size=12),
                                name=f'{col}',
                                showlegend=True)
                        )

        fig.update_layout(height=400,width=900, template='plotly_white',
                        title=dict(text=f'{col} with different Q values', font=dict(size=18), x=.5, y=.95),
                        yaxis=dict(title=f'{col}', side='left', showgrid=True,),
                        xaxis=dict(title='Q', showgrid=False),
                        legend=dict(orientation="h", yanchor="bottom", y=1, x=0.5, xanchor="center"),
                        )

        fig.show()

#### Test model for other anomalies

In [32]:
results_other_anomaly = pd.DataFrame(columns = ['Dataset', 'steps', 'Q', 'TPR', 'TNR', 'PPV', 'NPV', 'FPR', 'FNR', 'FDR', 'ACC'])
# for i in num_steps:

#     for combination in product(functions, repeat=i):

#         if len(set(combination)) == i:

combination = [wz, pca, sfp]

x_train_other_anomaly, y_train_other_anomaly, x_test_other_anomaly, y_test_other_anomaly = test_train_split(other_anomaly_X, other_anomaly_y)            

for func in combination:
    func.fit(x_train_other_anomaly, y_train_other_anomaly)
    x_train_other_anomaly = func.transform(x_train_other_anomaly)
    x_test_other_anomaly = func.transform(x_test_other_anomaly)

x_train_steps_other_anomaly = create_sequences(x_train_other_anomaly, N_STEPS)
x_test_steps_other_anomaly = create_sequences(x_test_other_anomaly, N_STEPS)

model.fit(x_train_steps_other_anomaly)

for Q in Qs:

    # results predicting
    residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_other_anomaly - model.predict(x_train_steps_other_anomaly)), axis=1), axis=1))
    UCL = residuals.quantile(Q)

    # train prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_other_anomaly - model.predict(x_train_steps_other_anomaly)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_train_steps_other_anomaly) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_train = pd.Series(data=0, index=np.arange(len(x_train_other_anomaly)))
    yhat_train.iloc[anomalous_data_indices] = 1

    # test prediction
    cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_other_anomaly - model.predict(x_test_steps_other_anomaly)), axis=1), axis=1))

    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data = cnn_residuals > UCL
    anomalous_data_indices = []
    for data_idx in range(N_STEPS - 1, len(x_test_steps_other_anomaly) - N_STEPS + 1):
        if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)

    yhat_test = pd.Series(data=0, index=np.arange(len(x_test_other_anomaly)))
    yhat_test.iloc[anomalous_data_indices] = 1

    conf_matrix = metrics.confusion_matrix(y_test_other_anomaly, yhat_test)

    TN, FP, FN, TP = conf_matrix.ravel()

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Specificity or true negative rate
    TNR = TN/(TN+FP)
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate FAR false alarm rate
    FPR = FP/(FP+TN)
    # False negative rate MAR missing alarm rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)
    # Overall accuracy
    ACC = (TP+TN)/(TP+FP+FN+TN)

    row = dict(Dataset = 'Other anomalies',
            steps = [[names[type(func)] for func in combination]],
            Q = Q,
            TPR = TPR,
            TNR = TNR,
            PPV = PPV,
            NPV = NPV,
            FPR = FPR,
            FNR = FNR,
            FDR = FDR,
            ACC = ACC)

    results_other_anomaly = pd.concat([results_other_anomaly, pd.DataFrame(row, index = [0])], ignore_index = True)



 77/367 [=====>........................] - ETA: 0s


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.





In [33]:
# results_other_anomaly.to_csv('results_other_anomalies_modified_Conv_AE.csv')

In [34]:
for i, col in enumerate(results_other_anomaly.columns[1:]):
    if col in['FPR', 'FNR']:
        fig = go.Figure()

        fig.add_trace(go.Scatter(mode='lines+text', x=results_other_anomaly.Q, y=results_other_anomaly[f'{col}'],
                                marker=dict(color=colors[i]),
                                texttemplate='%{y:.2f}', textposition='top center',
                                textfont=dict(color=colors[i], size=12),
                                name=f'{col}',
                                showlegend=True)
                        )

        fig.update_layout(height=400,width=900, template='plotly_white',
                        title=dict(text=f'{col} with different Q values', font=dict(size=18), x=.5, y=.95),
                        yaxis=dict(title=f'{col}', side='left', showgrid=True,),
                        xaxis=dict(title='Q', showgrid=False),
                        legend=dict(orientation="h", yanchor="bottom", y=1, x=0.5, xanchor="center"),
                        )

        fig.show()