In [1]:
# libraries importing
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

import sys
sys.path.append('../')

from algorithms.Conv_AE import Conv_AE
from data_processing.process_data import process_data, add_rolling_stats

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_selection import RFE
from sklearn import metrics
from sklearn.svm import SVR
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier

from feature_engine.selection import SmartCorrelatedSelection, DropConstantFeatures, DropDuplicateFeatures, DropFeatures, DropCorrelatedFeatures, SelectBySingleFeaturePerformance, RecursiveFeatureAddition
from feature_engine.outliers import Winsorizer

## Data loading

In [2]:
datasets = process_data()

valve1_X =  datasets["valve1_X"]
valve1_y = datasets["valve1_y"]
valve2_X = datasets["valve2_X"]
valve2_y = datasets["valve2_y"]
other_anomaly_X = datasets["other_anomaly_X"]
other_anomaly_y = datasets["other_anomaly_y"]

In [3]:
# hyperparameters selection
N_STEPS = 120
Q = 0.3 # quantile for upper control limit (UCL) selection
model = Conv_AE()

In [4]:
def test_train_split(df_X, df_y):
    size_train = int(df_X.shape[0]*0.8)
    size_test = df_X.shape[0] - size_train
    x_train = df_X[:size_train]
    y_train = df_y[:size_train].anomaly
    x_test = df_X[-size_test:]
    y_test = df_y[-size_test:].anomaly
    return x_train, y_train, x_test, y_test

In [5]:
x_train_valve1, y_train_valve1, x_test_valve1, y_test_valve1 = test_train_split(valve1_X, valve1_y)
x_train_valve2, y_train_valve2, x_test_valve2, y_test_valve2 = test_train_split(valve2_X, valve2_y)
x_train_other_anomaly, y_train_other_anomaly, x_test_other_anomaly, y_test_other_anomaly = test_train_split(other_anomaly_X, other_anomaly_y)

In [6]:
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=N_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

In [7]:
x_train_steps_valve1 = create_sequences(np.array([row.values for i, row in x_train_valve1.iterrows()]), N_STEPS)
x_test_steps_valve1 = create_sequences(np.array([row.values for i, row in x_test_valve1.iterrows()]), N_STEPS)

x_train_steps_valve2 = create_sequences(np.array([row.values for i, row in x_train_valve2.iterrows()]), N_STEPS)
x_test_steps_valve2 = create_sequences(np.array([row.values for i, row in x_test_valve2.iterrows()]), N_STEPS)

x_train_steps_other_anomaly = create_sequences(np.array([row.values for i, row in x_train_other_anomaly.iterrows()]), N_STEPS)
x_test_steps_other_anomaly = create_sequences(np.array([row.values for i, row in x_test_other_anomaly.iterrows()]), N_STEPS)

## Base models
#### Test model for valve 1

In [8]:
model.fit(x_train_steps_valve1)

# results predicting
residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve1 - model.predict(x_train_steps_valve1)), axis=1), axis=1))
UCL = residuals.quantile(Q)





In [9]:
# train prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve1 - model.predict(x_train_steps_valve1)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_train_steps_valve1) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_train = pd.Series(data=0, index=x_train_valve1.index)
yhat_train.iloc[anomalous_data_indices] = 1


# test prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_valve1 - model.predict(x_test_steps_valve1)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_test_steps_valve1) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_test = pd.Series(data=0, index=x_test_valve1.index)
yhat_test.iloc[anomalous_data_indices] = 1



In [10]:
print('Training accuracy {:.4f}'.format(accuracy_score(y_train_valve1, yhat_train)))
print('Testing accuracy {:.4f}'.format(accuracy_score(y_test_valve1, yhat_test)))

Training accuracy 0.4625
Testing accuracy 0.7561


In [11]:
accuracy = accuracy_score(y_test_valve1, yhat_test)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test_valve1, yhat_test)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test_valve1, yhat_test)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test_valve1, yhat_test)
print('F1 score: %f' % f1)

Accuracy: 0.756124
Precision: 0.794266
Recall: 0.381377
F1 score: 0.515317


In [12]:
conf_matrix = metrics.confusion_matrix(y_test_valve1, yhat_test)

TN, FP, FN, TP = conf_matrix.ravel()

# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate FAR false alarm rate
FPR = FP/(FP+TN)
# False negative rate MAR missing alarm rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

# Overall accuracy
ACC = (TP+TN)/(TP+FP+FN+TN)

print(f'False Alarm Rate: {FPR}')
print(f'Missing Alarm Rate: {FNR}')

False Alarm Rate: 0.05087572977481234
Missing Alarm Rate: 0.6186234817813765


In [13]:
print(metrics.classification_report(y_test_valve1,yhat_test))

              precision    recall  f1-score   support

         0.0       0.75      0.95      0.84      2398
         1.0       0.79      0.38      0.52      1235

    accuracy                           0.76      3633
   macro avg       0.77      0.67      0.68      3633
weighted avg       0.76      0.76      0.73      3633



#### Test model for valve 2

In [14]:
model.fit(x_train_steps_valve2)

# results predicting
residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve2 - model.predict(x_train_steps_valve2)), axis=1), axis=1))
UCL = residuals.quantile(Q)





In [15]:
# train prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve2 - model.predict(x_train_steps_valve2)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_train_steps_valve2) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_train = pd.Series(data=0, index=x_train_valve2.index)
yhat_train.iloc[anomalous_data_indices] = 1


# test prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_valve2 - model.predict(x_test_steps_valve2)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_test_steps_valve2) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_test = pd.Series(data=0, index=x_test_valve2.index)
yhat_test.iloc[anomalous_data_indices] = 1
  
  



In [16]:
print('Training accuracy {:.4f}'.format(accuracy_score(y_train_valve2, yhat_train)))
print('Testing accuracy {:.4f}'.format(accuracy_score(y_test_valve2, yhat_test)))

Training accuracy 0.5938
Testing accuracy 0.7428


In [17]:
accuracy = accuracy_score(y_test_valve2, yhat_test)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test_valve2, yhat_test)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test_valve2, yhat_test)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test_valve2, yhat_test)
print('F1 score: %f' % f1)

Accuracy: 0.742758
Precision: 0.906103
Recall: 0.488608
F1 score: 0.634868


In [18]:
conf_matrix = metrics.confusion_matrix(y_test_valve2, yhat_test)

TN, FP, FN, TP = conf_matrix.ravel()

# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate FAR false alarm rate
FPR = FP/(FP+TN)
# False negative rate MAR missing alarm rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

# Overall accuracy
ACC = (TP+TN)/(TP+FP+FN+TN)

print(f'False Alarm Rate: {FPR}')
print(f'Missing Alarm Rate: {FNR}')

False Alarm Rate: 0.042735042735042736
Missing Alarm Rate: 0.5113924050632911


In [19]:
print(metrics.classification_report(y_test_valve2,yhat_test))

              precision    recall  f1-score   support

         0.0       0.69      0.96      0.80       468
         1.0       0.91      0.49      0.63       395

    accuracy                           0.74       863
   macro avg       0.80      0.72      0.72       863
weighted avg       0.79      0.74      0.73       863



#### Test model for other anomalies

In [20]:
model.fit(x_train_steps_other_anomaly)

# results predicting
residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_other_anomaly - model.predict(x_train_steps_other_anomaly)), axis=1), axis=1))
UCL = residuals.quantile(Q)





In [21]:
# train prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_other_anomaly - model.predict(x_train_steps_other_anomaly)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_train_steps_other_anomaly) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_train = pd.Series(data=0, index=x_train_other_anomaly.index)
yhat_train.iloc[anomalous_data_indices] = 1


# test prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_other_anomaly - model.predict(x_test_steps_other_anomaly)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_test_steps_other_anomaly) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_test = pd.Series(data=0, index=x_test_other_anomaly.index)
yhat_test.iloc[anomalous_data_indices] = 1
  
  



In [22]:
print('Training accuracy {:.4f}'.format(accuracy_score(y_train_other_anomaly, yhat_train)))
print('Testing accuracy {:.4f}'.format(accuracy_score(y_test_other_anomaly, yhat_test)))

Training accuracy 0.5277
Testing accuracy 0.5415


In [23]:
accuracy = accuracy_score(y_test_other_anomaly, yhat_test)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test_other_anomaly, yhat_test)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test_other_anomaly, yhat_test)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test_other_anomaly, yhat_test)
print('F1 score: %f' % f1)

Accuracy: 0.541542
Precision: 0.000000
Recall: 0.000000
F1 score: 0.000000


  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
conf_matrix = metrics.confusion_matrix(y_test_other_anomaly, yhat_test)

TN, FP, FN, TP = conf_matrix.ravel()

# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate FAR false alarm rate
FPR = FP/(FP+TN)
# False negative rate MAR missing alarm rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

# Overall accuracy
ACC = (TP+TN)/(TP+FP+FN+TN)

print(f'False Alarm Rate: {FPR}')
print(f'Missing Alarm Rate: {FNR}')

False Alarm Rate: 0.0
Missing Alarm Rate: 1.0


  PPV = TP/(TP+FP)
  FDR = FP/(TP+FP)


In [25]:
print(metrics.classification_report(y_test_other_anomaly,yhat_test))

              precision    recall  f1-score   support

         0.0       0.54      1.00      0.70      1623
         1.0       0.00      0.00      0.00      1374

    accuracy                           0.54      2997
   macro avg       0.27      0.50      0.35      2997
weighted avg       0.29      0.54      0.38      2997



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Apply steps on datasets

__Add rolling stats__

In [27]:
x_train_valve1 = add_rolling_stats(x_train_valve1, '1min', x_train_valve1.columns)
x_test_valve1 = add_rolling_stats(x_test_valve1, '1min', x_test_valve1.columns)
x_train_valve2 = add_rolling_stats(x_train_valve2, '1min', x_train_valve2.columns)
x_test_valve2 = add_rolling_stats(x_test_valve2, '1min', x_test_valve2.columns)
x_train_other_anomaly = add_rolling_stats(x_train_other_anomaly, '1min', x_train_other_anomaly.columns)
x_test_other_anomaly = add_rolling_stats(x_test_other_anomaly, '1min', x_test_other_anomaly.columns)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_rolling_mean"] = df[f"{col}"].rolling(time_diff).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_rolling_mean"] = df[f"{col}"].rolling(time_diff).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_rolling_mean"] = df[f"{col}"].rolling(time_diff).mean()
A va

__Winsorizer__

In [28]:
wz = Winsorizer(capping_method='quantiles', tail='both', fold=3)

In [29]:
# wz.fit(x_train_valve1)
# x_train_valve1 = wz.transform(x_train_valve1)
# x_test_valve1 = wz.transform(x_test_valve1)

# wz.fit(x_train_valve2)
# x_train_valve2 = wz.transform(x_train_valve2)
# x_test_valve2 = wz.transform(x_test_valve2)

# wz.fit(x_train_other_anomaly)
# x_train_other_anomaly = wz.transform(x_train_other_anomaly)
# x_test_other_anomaly = wz.transform(x_test_other_anomaly)

__Standard scaler__

In [30]:
sc = StandardScaler()

In [31]:
sc.fit(x_train_valve1, y_train_valve1)
x_train_valve1 = sc.transform(x_train_valve1)
x_test_valve1 = sc.transform(x_test_valve1)

sc.fit(x_train_valve2, y_train_valve2)
x_train_valve2 = sc.transform(x_train_valve2)
x_test_valve2 = sc.transform(x_test_valve2)

sc.fit(x_train_other_anomaly, y_train_other_anomaly)
x_train_other_anomaly = sc.transform(x_train_other_anomaly)
x_test_other_anomaly = sc.transform(x_test_other_anomaly)

__PCA__

In [32]:
pca = PCA(n_components='mle', svd_solver='full')

In [33]:
pca.fit(x_train_valve1, y_train_valve1)
x_train_valve1 = pca.transform(x_train_valve1)
x_test_valve1 = pca.transform(x_test_valve1)

pca.fit(x_train_valve2, y_train_valve2)
x_train_valve2 = pca.transform(x_train_valve2)
x_test_valve2 = pca.transform(x_test_valve2)

pca.fit(x_train_other_anomaly, y_train_other_anomaly)
x_train_other_anomaly = pca.transform(x_train_other_anomaly)
x_test_other_anomaly = pca.transform(x_test_other_anomaly)

__RFE based on SVM__

In [34]:
estimator = SVR(kernel="linear")
rfe = RFE(estimator, n_features_to_select=3, step=1)

In [35]:
# rfe.fit(x_train_valve1, y_train_valve1)
# x_train_valve1 = rfe.transform(x_train_valve1)
# x_test_valve1 = rfe.transform(x_test_valve1)

# rfe.fit(x_train_valve2, y_train_valve2)
# x_train_valve2 = rfe.transform(x_train_valve2)
# x_test_valve2 = rfe.transform(x_test_valve2)

# rfe.fit(x_train_other_anomaly, y_train_other_anomaly)
# x_train_other_anomaly = rfe.transform(x_train_other_anomaly)
# x_test_other_anomaly = rfe.transform(x_test_other_anomaly)

__Feature selection by single feature performance using random forest estimator__

In [36]:
sfp = SelectBySingleFeaturePerformance(
                    RandomForestClassifier(random_state=42),
                    cv=2)

In [37]:
sfp.fit(x_train_valve1, y_train_valve1)
x_train_valve1 = sfp.transform(x_train_valve1)
x_test_valve1 = sfp.transform(x_test_valve1)

sfp.fit(x_train_valve2, y_train_valve2)
x_train_valve2 = sfp.transform(x_train_valve2)
x_test_valve2 = sfp.transform(x_test_valve2)

sfp.fit(x_train_other_anomaly, y_train_other_anomaly)
x_train_other_anomaly = sfp.transform(x_train_other_anomaly)
x_test_other_anomaly = sfp.transform(x_test_other_anomaly)

__Feature selection by information value__

In [38]:
rfa = RecursiveFeatureAddition(RandomForestClassifier(random_state=42), cv=3)

In [39]:
# rfa.fit(x_train_valve1, y_train_valve1)
# x_train_valve1 = rfa.transform(x_train_valve1)
# x_test_valve1 = rfa.transform(x_test_valve1)

# rfa.fit(x_train_valve2, y_train_valve2)
# x_train_valve2 = rfa.transform(x_train_valve2)
# x_test_valve2 = rfa.transform(x_test_valve2)

# rfa.fit(x_train_other_anomaly, y_train_other_anomaly)
# x_train_other_anomaly = rfa.transform(x_train_other_anomaly)
# x_test_other_anomaly = rfa.transform(x_test_other_anomaly)

__Smart correlated features__

In [40]:
scs = SmartCorrelatedSelection(threshold=0.8)

In [41]:
# print(f'valve 1 dataset number of columns: {x_train_valve1.shape[1]}')
# print(f'valve 2 dataset number of columns: {x_train_valve2.shape[1]}')
# print(f'other animalies dataset number of columns: {x_train_other_anomaly.shape[1]}')

# scs.fit(x_train_valve1, y_train_valve1)
# x_train_valve1 = scs.transform(x_train_valve1)
# x_test_valve1 = scs.transform(x_test_valve1)

# scs.fit(x_train_valve2, y_train_valve2)
# x_train_valve2 = scs.transform(x_train_valve2)
# x_test_valve2 = scs.transform(x_test_valve2)

# scs.fit(x_train_other_anomaly, y_train_other_anomaly)
# x_train_other_anomaly = scs.transform(x_train_other_anomaly)
# x_test_other_anomaly = scs.transform(x_test_other_anomaly)

# print('********************* Drop correlated columns *********************')
# print(f'valve 1 dataset number of columns: {x_train_valve1.shape[1]}')
# print(f'valve 2 dataset number of columns: {x_train_valve2.shape[1]}')
# print(f'other animalies dataset number of columns: {x_train_other_anomaly.shape[1]}')

## Retest models


In [42]:
# x_train_steps_valve1 = create_sequences(np.array([row.values for i, row in x_train_valve1.iterrows()]), N_STEPS)
# x_test_steps_valve1 = create_sequences(np.array([row.values for i, row in x_test_valve1.iterrows()]), N_STEPS)
x_train_steps_valve1 = create_sequences(x_train_valve1, N_STEPS)
x_test_steps_valve1 = create_sequences(x_test_valve1, N_STEPS)

# x_train_steps_valve2 = create_sequences(np.array([row.values for i, row in x_train_valve2.iterrows()]), N_STEPS)
# x_test_steps_valve2 = create_sequences(np.array([row.values for i, row in x_test_valve2.iterrows()]), N_STEPS)
x_train_steps_valve2 = create_sequences(x_train_valve2, N_STEPS)
x_test_steps_valve2 = create_sequences(x_test_valve2, N_STEPS)

# x_train_steps_other_anomaly = create_sequences(np.array([row.values for i, row in x_train_other_anomaly.iterrows()]), N_STEPS)
# x_test_steps_other_anomaly = create_sequences(np.array([row.values for i, row in x_test_other_anomaly.iterrows()]), N_STEPS)
x_train_steps_other_anomaly = create_sequences(x_train_other_anomaly, N_STEPS)
x_test_steps_other_anomaly = create_sequences(x_test_other_anomaly, N_STEPS)


#### Test model for valve 1

In [43]:
model.fit(x_train_steps_valve1)

# results predicting
residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve1 - model.predict(x_train_steps_valve1)), axis=1), axis=1))
UCL = residuals.quantile(Q)





In [44]:
# train prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve1 - model.predict(x_train_steps_valve1)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_train_steps_valve1) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_train = pd.Series(data=0, index=np.arange(len(x_train_valve1)))
yhat_train.iloc[anomalous_data_indices] = 1


# test prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_valve1 - model.predict(x_test_steps_valve1)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_test_steps_valve1) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_test = pd.Series(data=0, index=np.arange(len(x_test_valve1)))
yhat_test.iloc[anomalous_data_indices] = 1



In [45]:
print('Training accuracy {:.4f}'.format(accuracy_score(y_train_valve1, yhat_train)))
print('Testing accuracy {:.4f}'.format(accuracy_score(y_test_valve1, yhat_test)))

Training accuracy 0.5240
Testing accuracy 0.7790


In [46]:
accuracy = accuracy_score(y_test_valve1, yhat_test)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test_valve1, yhat_test)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test_valve1, yhat_test)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test_valve1, yhat_test)
print('F1 score: %f' % f1)

Accuracy: 0.778971
Precision: 0.626021
Recall: 0.868826
F1 score: 0.727704


In [47]:
conf_matrix = metrics.confusion_matrix(y_test_valve1, yhat_test)

TN, FP, FN, TP = conf_matrix.ravel()

# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate FAR false alarm rate
FPR = FP/(FP+TN)
# False negative rate MAR missing alarm rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

# Overall accuracy
ACC = (TP+TN)/(TP+FP+FN+TN)

print(f'False Alarm Rate: {FPR}')
print(f'Missing Alarm Rate: {FNR}')

False Alarm Rate: 0.2673060884070058
Missing Alarm Rate: 0.1311740890688259


In [48]:
print(metrics.classification_report(y_test_valve1,yhat_test))

              precision    recall  f1-score   support

         0.0       0.92      0.73      0.81      2398
         1.0       0.63      0.87      0.73      1235

    accuracy                           0.78      3633
   macro avg       0.77      0.80      0.77      3633
weighted avg       0.82      0.78      0.78      3633



#### Test model for valve 2

In [49]:
model.fit(x_train_steps_valve2)

# results predicting
residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve2 - model.predict(x_train_steps_valve2)), axis=1), axis=1))
UCL = residuals.quantile(Q)





In [50]:
# train prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_valve2 - model.predict(x_train_steps_valve2)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_train_steps_valve2) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_train = pd.Series(data=0, index=np.arange(len(x_train_valve2)))
yhat_train.iloc[anomalous_data_indices] = 1


# test prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_valve2 - model.predict(x_test_steps_valve2)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_test_steps_valve2) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_test = pd.Series(data=0, index=np.arange(len(x_test_valve2)))
yhat_test.iloc[anomalous_data_indices] = 1
  
  



In [51]:
print('Training accuracy {:.4f}'.format(accuracy_score(y_train_valve2, yhat_train)))
print('Testing accuracy {:.4f}'.format(accuracy_score(y_test_valve2, yhat_test)))

Training accuracy 0.7979
Testing accuracy 0.6744


In [52]:
accuracy = accuracy_score(y_test_valve2, yhat_test)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test_valve2, yhat_test)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test_valve2, yhat_test)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test_valve2, yhat_test)
print('F1 score: %f' % f1)

Accuracy: 0.674392
Precision: 0.860759
Recall: 0.344304
F1 score: 0.491863


In [53]:
conf_matrix = metrics.confusion_matrix(y_test_valve2, yhat_test)

TN, FP, FN, TP = conf_matrix.ravel()

# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate FAR false alarm rate
FPR = FP/(FP+TN)
# False negative rate MAR missing alarm rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

# Overall accuracy
ACC = (TP+TN)/(TP+FP+FN+TN)

print(f'False Alarm Rate: {FPR}')
print(f'Missing Alarm Rate: {FNR}')

False Alarm Rate: 0.04700854700854701
Missing Alarm Rate: 0.6556962025316456


In [54]:
print(metrics.classification_report(y_test_valve2,yhat_test))

              precision    recall  f1-score   support

         0.0       0.63      0.95      0.76       468
         1.0       0.86      0.34      0.49       395

    accuracy                           0.67       863
   macro avg       0.75      0.65      0.63       863
weighted avg       0.74      0.67      0.64       863



#### Test model for other anomalies

In [55]:
model.fit(x_train_steps_other_anomaly)

# results predicting
residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_other_anomaly - model.predict(x_train_steps_other_anomaly)), axis=1), axis=1))
UCL = residuals.quantile(Q)





In [56]:
# train prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_train_steps_other_anomaly - model.predict(x_train_steps_other_anomaly)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_train_steps_other_anomaly) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_train = pd.Series(data=0, index=np.arange(len(x_train_other_anomaly)))
yhat_train.iloc[anomalous_data_indices] = 1


# test prediction
cnn_residuals = pd.Series(np.sum(np.mean(np.abs(x_test_steps_other_anomaly - model.predict(x_test_steps_other_anomaly)), axis=1), axis=1))

# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data = cnn_residuals > UCL
anomalous_data_indices = []
for data_idx in range(N_STEPS - 1, len(x_test_steps_other_anomaly) - N_STEPS + 1):
    if np.all(anomalous_data[data_idx - N_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

yhat_test = pd.Series(data=0, index=np.arange(len(x_test_other_anomaly)))
yhat_test.iloc[anomalous_data_indices] = 1
  
  



In [57]:
print('Training accuracy {:.4f}'.format(accuracy_score(y_train_other_anomaly, yhat_train)))
print('Testing accuracy {:.4f}'.format(accuracy_score(y_test_other_anomaly, yhat_test)))

Training accuracy 0.5701
Testing accuracy 0.3627


In [58]:
accuracy = accuracy_score(y_test_other_anomaly, yhat_test)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test_other_anomaly, yhat_test)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test_other_anomaly, yhat_test)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test_other_anomaly, yhat_test)
print('F1 score: %f' % f1)

Accuracy: 0.362696
Precision: 0.398485
Recall: 0.765648
F1 score: 0.524165


In [59]:
conf_matrix = metrics.confusion_matrix(y_test_other_anomaly, yhat_test)

TN, FP, FN, TP = conf_matrix.ravel()

# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate FAR false alarm rate
FPR = FP/(FP+TN)
# False negative rate MAR missing alarm rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

# Overall accuracy
ACC = (TP+TN)/(TP+FP+FN+TN)

print(f'False Alarm Rate: {FPR}')
print(f'Missing Alarm Rate: {FNR}')

False Alarm Rate: 0.9784349969192853
Missing Alarm Rate: 0.23435225618631733


In [60]:
print(metrics.classification_report(y_test_other_anomaly,yhat_test))

              precision    recall  f1-score   support

         0.0       0.10      0.02      0.04      1623
         1.0       0.40      0.77      0.52      1374

    accuracy                           0.36      2997
   macro avg       0.25      0.39      0.28      2997
weighted avg       0.24      0.36      0.26      2997

