# CNN AutoEncoder Results

Helpful for Time-Series Anamoly Detection 

Articles:

[Anamoly Detection with AutoEncoder](https://towardsdatascience.com/anomaly-detection-with-autoencoder-b4cdce4866a6)

[Machine Learning for Anamoly Detection and Condition Monitoring](https://towardsdatascience.com/how-to-use-machine-learning-for-anomaly-detection-and-condition-monitoring-6742f82900d7)

### Libraries

In [None]:
%%capture
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import roc_auc_score, precision_recall_curve, accuracy_score
from sklearn.metrics import classification_report, roc_curve, PrecisionRecallDisplay, RocCurveDisplay, auc
from sklearn.impute import SimpleImputer
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.metrics import AUC
from keras.models import Sequential

import os
import glob
import fileinput
import matplotlib.pyplot as plt
import seaborn as sns

# Import autoencoder for TSC anamoly detection
!pip install pyod
from pyod.models.auto_encoder import AutoEncoder

# MTAKDD-19

In [None]:
# MTAKDD-19

dfLEG = pd.read_csv('https://raw.githubusercontent.com/IvanLetteri/MTA-KDD-19/master/datasetLegitimate33featues.csv')
dfMTA = pd.read_csv('https://raw.githubusercontent.com/IvanLetteri/MTA-KDD-19/master/datasetMalware33featues.csv')
df = pd.concat([dfMTA, dfLEG])
print(df.shape)
df.head()

(64554, 34)


Unnamed: 0,FinFlagDist,SynFlagDist,RstFlagDist,PshFlagDist,AckFlagDist,DNSoverIP,TCPoverIP,UDPoverIP,MaxLen,MinLen,...,NumPorts,FlowLEN,FlowLENrx,repeated_pkts_ratio,NumCon,NumIPdst,Start_flow,DeltaTimeFlow,HTTPpkts,label
0,0.478168,1.144246,-0.716937,0.67147,1.089129,-0.154516,0.154711,-0.156991,-0.009901,-1.028609,...,-0.889691,0.479515,0.284428,0.862578,5.253913,5.142702,0.5624,-0.052248,1.487787,1.0
1,-1.204652,0.316746,-0.716937,1.537622,1.135609,-0.154516,0.154711,-0.156991,0.722669,0.62864,...,0.293799,1.30184,-0.036629,-0.908981,-0.190341,-0.194497,0.5624,-0.315281,1.786307,1.0
2,-1.204652,-0.216194,-0.716937,-0.457715,-0.848148,-0.154516,0.154711,-0.156991,-0.172436,0.62864,...,-0.889691,-0.452523,-0.049479,-0.671509,-0.190341,-0.194497,0.5624,-1.197897,0.583908,1.0
3,0.478168,0.636365,-0.716937,1.61548,1.274407,-0.154516,0.154711,-0.156991,0.722669,0.62864,...,0.823293,1.41637,-0.042111,-1.626711,-0.190341,-0.194497,0.5624,-0.097754,1.805515,1.0
4,1.74784,1.867679,1.867097,0.903129,1.473227,6.778562,-7.182148,6.836627,0.485384,0.62864,...,1.116241,0.868445,0.678529,0.902818,5.253913,5.142781,0.563213,0.885878,1.614688,1.0


In [None]:
# empty list and DataFrame to capture statistics
results = [] 
precision_recall_all = pd.DataFrame()

# split into input (X) and output (y) variables
X = df.iloc[:,:33]
y = df.iloc[:,33]

# split into training (80%), validation (10%), and test (10%) sets
rand_seed = 123
X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size = 0.8,
                                                  random_state = rand_seed,
                                                  stratify = y)
X_valid, X_test, y_valid, y_test = train_test_split(X_rem, y_rem, 
                                                    train_size = 0.5,
                                                    random_state = rand_seed,
                                                    stratify = y_rem)

#Creating and fitting model
model = AutoEncoder(contamination=0.05, hidden_neurons = [3,3],
                        epochs = 8, optimizer='Nadam')
model.fit(X_train)
    
# make predictions with model (.50 threshold)
predictions = (model.predict(X_test) > 0.5).astype(int)

# calculate metrics for each model
precision, recall, fscore, support = score(y_test, predictions, 
                                           average = 'binary')
auc = roc_auc_score(y_test, predictions)
accuracy = accuracy_score(y_test, predictions)
    
# append metrics to results
results.append(
    {
      'Accuracy': accuracy,
      'Precision': precision,
      'Recall': recall,
      'F1 Score': fscore,
      'AUC': auc
    })
    
# write data for precision recall curves to csv
cols = ['precision', 'recall', 'thresholds']
precision_recall = precision_recall_curve(y_test, model.predict(X_test))
precision_recall = pd.DataFrame(precision_recall, index = cols).transpose()
precision_recall_all = precision_recall_all.append(precision_recall)

print(results)

In [None]:
# view results
results_df = pd.DataFrame(results)
results_df

In [None]:
# print results and precision_recall to csv
# results_df.to_csv('results.csv') 
# precision_recall_all.to_csv('precision_recall.csv')

In [None]:
precision_recall_all.head()

In [None]:
# precision recall graph
sns.lineplot(data = precision_recall_all, x = 'recall', y = 'precision')



# CTU-13

In [None]:
# list of scenarios
scenarios = ['1','2','3','4','5','6', '7','8','9','10', '11', '12', '13_test']
colab_path = '/content/'

# empty list to capture statistics for each scenario
results = []
precision_recall_all = pd.DataFrame()

# iterate over scenarios to build, train, and evaluate LSTM model
for filename in scenarios:

    # load the dataset
    scenario = pd.read_csv(colab_path + filename + '.txt', header=None)
    
    #impute missing vlaues
    imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean',verbose=0)
    imputer = imputer.fit(scenario.iloc[:, 1:])
    scenario.iloc[:, 1:] = imputer.transform(scenario.iloc[:, 1:])

    # split into input (X) and output (y) variables
    X = scenario.iloc[:,1:]
    y = scenario.iloc[:,0]

    # split into training (80%), validation (10%), and test (10%) sets
    rand_seed = 123
    X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size = 0.8,
                                                      random_state = rand_seed,
                                                      stratify = y)
    X_valid, X_test, y_valid, y_test = train_test_split(X_rem, y_rem, 
                                                        train_size = 0.5,
                                                        random_state = rand_seed,
                                                        stratify = y_rem)
    
    #Creating and fitting model
    model = AutoEncoder(contamination=0.05, hidden_neurons=[3,3],
                        epochs=8, optimizer='Nadam')
    # model.fit(X_train)
    model.fit(X_train, y_train)
    
    #make predictions with model and print classification report
    predictions = (model.predict(X_test) > .5).astype(int)
    print(filename)
    print(classification_report(y_test, predictions))

    # calculate metrics for each model
    precision, recall, fscore, support = score(y_test, predictions, 
                                                   average = 'binary')
    try:
      auc = roc_auc_score(y_test, predictions)
    except ValueError:
      pass
    accuracy = accuracy_score(y_test, predictions)


    # append metrics to results
    results.append(
        {
            'Scenario': filename,
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': fscore,
            'AUC': auc
        })

    # write data for precision recall curves to csv
    cols = ['precision', 'recall', 'thresholds']
    precision_recall = precision_recall_curve(y_test, model.predict(X_test))
    precision_recall = pd.DataFrame(precision_recall, index = cols).transpose()
    precision_recall['scenario'] = filename
    precision_recall_all = precision_recall_all.append(precision_recall)

    print("scenario", filename, "complete")



Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_95 (Dense)            (None, 28)                812       
                                                                 
 dropout_76 (Dropout)        (None, 28)                0         
                                                                 
 dense_96 (Dense)            (None, 28)                812       
                                                                 
 dropout_77 (Dropout)        (None, 28)                0         
                                                                 
 dense_97 (Dense)            (None, 3)                 87        
                                                                 
 dropout_78 (Dropout)        (None, 3)                 0         
                                                                 
 dense_98 (Dense)            (None, 3)               

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 1 complete




Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_100 (Dense)           (None, 28)                812       
                                                                 
 dropout_80 (Dropout)        (None, 28)                0         
                                                                 
 dense_101 (Dense)           (None, 28)                812       
                                                                 
 dropout_81 (Dropout)        (None, 28)                0         
                                                                 
 dense_102 (Dense)           (None, 3)                 87        
                                                                 
 dropout_82 (Dropout)        (None, 3)                 0         
                                                                 
 dense_103 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 2 complete




Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_105 (Dense)           (None, 28)                812       
                                                                 
 dropout_84 (Dropout)        (None, 28)                0         
                                                                 
 dense_106 (Dense)           (None, 28)                812       
                                                                 
 dropout_85 (Dropout)        (None, 28)                0         
                                                                 
 dense_107 (Dense)           (None, 3)                 87        
                                                                 
 dropout_86 (Dropout)        (None, 3)                 0         
                                                                 
 dense_108 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 3 complete




Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_110 (Dense)           (None, 28)                812       
                                                                 
 dropout_88 (Dropout)        (None, 28)                0         
                                                                 
 dense_111 (Dense)           (None, 28)                812       
                                                                 
 dropout_89 (Dropout)        (None, 28)                0         
                                                                 
 dense_112 (Dense)           (None, 3)                 87        
                                                                 
 dropout_90 (Dropout)        (None, 3)                 0         
                                                                 
 dense_113 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 4 complete




Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_115 (Dense)           (None, 28)                812       
                                                                 
 dropout_92 (Dropout)        (None, 28)                0         
                                                                 
 dense_116 (Dense)           (None, 28)                812       
                                                                 
 dropout_93 (Dropout)        (None, 28)                0         
                                                                 
 dense_117 (Dense)           (None, 3)                 87        
                                                                 
 dropout_94 (Dropout)        (None, 3)                 0         
                                                                 
 dense_118 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_120 (Dense)           (None, 28)                812       
                                                                 
 dropout_96 (Dropout)        (None, 28)                0         
                                                                 
 dense_121 (Dense)           (None, 28)                812       
                                                                 
 dropout_97 (Dropout)        (None, 28)                0         
                                                                 
 dense_122 (Dense)           (None, 3)                 87        
                                                                 
 dropout_98 (Dropout)        (None, 3)                 0         
                                                                 
 dense_123 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 6 complete




Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_125 (Dense)           (None, 28)                812       
                                                                 
 dropout_100 (Dropout)       (None, 28)                0         
                                                                 
 dense_126 (Dense)           (None, 28)                812       
                                                                 
 dropout_101 (Dropout)       (None, 28)                0         
                                                                 
 dense_127 (Dense)           (None, 3)                 87        
                                                                 
 dropout_102 (Dropout)       (None, 3)                 0         
                                                                 
 dense_128 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_130 (Dense)           (None, 28)                812       
                                                                 
 dropout_104 (Dropout)       (None, 28)                0         
                                                                 
 dense_131 (Dense)           (None, 28)                812       
                                                                 
 dropout_105 (Dropout)       (None, 28)                0         
                                                                 
 dense_132 (Dense)           (None, 3)                 87        
                                                                 
 dropout_106 (Dropout)       (None, 3)                 0         
                                                                 
 dense_133 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 8 complete




Model: "sequential_27"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_135 (Dense)           (None, 28)                812       
                                                                 
 dropout_108 (Dropout)       (None, 28)                0         
                                                                 
 dense_136 (Dense)           (None, 28)                812       
                                                                 
 dropout_109 (Dropout)       (None, 28)                0         
                                                                 
 dense_137 (Dense)           (None, 3)                 87        
                                                                 
 dropout_110 (Dropout)       (None, 3)                 0         
                                                                 
 dense_138 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 9 complete




Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_140 (Dense)           (None, 28)                812       
                                                                 
 dropout_112 (Dropout)       (None, 28)                0         
                                                                 
 dense_141 (Dense)           (None, 28)                812       
                                                                 
 dropout_113 (Dropout)       (None, 28)                0         
                                                                 
 dense_142 (Dense)           (None, 3)                 87        
                                                                 
 dropout_114 (Dropout)       (None, 3)                 0         
                                                                 
 dense_143 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 10 complete




Model: "sequential_29"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_145 (Dense)           (None, 28)                812       
                                                                 
 dropout_116 (Dropout)       (None, 28)                0         
                                                                 
 dense_146 (Dense)           (None, 28)                812       
                                                                 
 dropout_117 (Dropout)       (None, 28)                0         
                                                                 
 dense_147 (Dense)           (None, 3)                 87        
                                                                 
 dropout_118 (Dropout)       (None, 3)                 0         
                                                                 
 dense_148 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_150 (Dense)           (None, 28)                812       
                                                                 
 dropout_120 (Dropout)       (None, 28)                0         
                                                                 
 dense_151 (Dense)           (None, 28)                812       
                                                                 
 dropout_121 (Dropout)       (None, 28)                0         
                                                                 
 dense_152 (Dense)           (None, 3)                 87        
                                                                 
 dropout_122 (Dropout)       (None, 3)                 0         
                                                                 
 dense_153 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


scenario 12 complete




Model: "sequential_31"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_155 (Dense)           (None, 28)                812       
                                                                 
 dropout_124 (Dropout)       (None, 28)                0         
                                                                 
 dense_156 (Dense)           (None, 28)                812       
                                                                 
 dropout_125 (Dropout)       (None, 28)                0         
                                                                 
 dense_157 (Dense)           (None, 3)                 87        
                                                                 
 dropout_126 (Dropout)       (None, 3)                 0         
                                                                 
 dense_158 (Dense)           (None, 3)               

  precision_recall_all = precision_recall_all.append(precision_recall)


In [None]:
results_df = pd.DataFrame(results).sort_values('Scenario')
results_df

In [None]:
# print results and precision_recall to csv
# results_df.to_csv('results.csv') 
# precision_recall_all.to_csv('precision_recall.csv')

In [None]:
# precision recall graphs for each scenario
graph = sns.FacetGrid(precision_recall_all, col = 'scenario', col_wrap = 3)
graph.map(sns.lineplot, 'recall', 'precision')