<a href="https://colab.research.google.com/github/ANIZAI/Machine-Learning-based-Anomaly-Detection/blob/main/cnn_anomaly_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#imports_part_1.py
from pathlib import Path # convenient way to deal w/ paths
import plotly.graph_objects as go # creates plots
import numpy as np # standard for data processing
import pandas as pd # standard for data processing
import json # we have anomalies' timestamps in json format

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#path_specification.py
# Path to the whole data from NAB git repository
#nab = Path.cwd()/'NAB'
 
# This folder contains all files w/ metrics
#data_path = nab/'data'
 
# There is also separate json file 
# w/ timestamps of anomalies in files w/ metrics
labels_filepath = '/content/drive/MyDrive/combined_labels.json'
 
# Path from data folder to the training file
training_filename = '/content/drive/MyDrive/rds_cpu_utilization_cc0c53.csv'
 
# Path from data folder to the validation file
valid_filename = '/content/drive/MyDrive/rds_cpu_utilization_e47b3b.csv'

In [None]:
#labels_loading.py
with open(labels_filepath, 'r') as f:
    anomalies_timestamps = json.load(f)


In [None]:
#read_data.py 
train = pd.read_csv(training_filename)
valid = pd.read_csv(valid_filename)

In [None]:
train.head()

Unnamed: 0,timestamp,value
0,2014-02-14 14:30:00,6.456
1,2014-02-14 14:35:00,5.816
2,2014-02-14 14:40:00,6.268
3,2014-02-14 14:45:00,5.816
4,2014-02-14 14:50:00,5.862


In [None]:
valid.head()

Unnamed: 0,timestamp,value
0,2014-04-10 00:02:00,14.012
1,2014-04-10 00:07:00,13.334
2,2014-04-10 00:12:00,15.0
3,2014-04-10 00:17:00,13.998
4,2014-04-10 00:22:00,14.332


In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
def parse_and_standardize(df: pd.DataFrame, scaler: StandardScaler = None):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['stand_value'] = df['value']
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(df['stand_value'].values.reshape(-1, 1))
    df['stand_value'] = scaler.transform(df['stand_value'].values.reshape(-1, 1))
    return scaler

data_scaler = parse_and_standardize(train)
parse_and_standardize(valid, data_scaler)

StandardScaler(copy=True, with_mean=True, with_std=True)

## Get anomalies from the data

In [None]:
train_anomalies = train[train['timestamp'].isin(anomalies_timestamps[training_filename])]
valid_anomalies = valid[valid['timestamp'].isin(anomalies_timestamps[valid_filename])]

In [None]:
train_anomalies

Unnamed: 0,timestamp,value,stand_value
3080,2014-02-25 07:15:00,25.1033,4.652449
3579,2014-02-27 00:50:00,19.165,3.026441


In [None]:
valid_anomalies

Unnamed: 0,timestamp,value,stand_value
946,2014-04-13 06:52:00,76.23,18.651805
2585,2014-04-18 23:27:00,25.11,4.654283






## Plot data usage with anomalies

### Training data

In [None]:
import plotly.graph_objects as go

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train_anomalies['timestamp'], y=train_anomalies['value'], 
                         mode='markers', name='Anomaly',
                         marker=dict(color='green', size=13)))

### Validation data

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid_anomalies['timestamp'], y=valid_anomalies['value'], 
                         mode='markers', name='Anomaly',
                         marker=dict(color='green', size=13)))

## Label anomalies and non-anomalies accordingly

In [None]:
train['anomaly'] = 0
train.loc[train_anomalies.index, 'anomaly'] = 1
train.iloc[train_anomalies.index]

Unnamed: 0,timestamp,value,stand_value,anomaly
3080,2014-02-25 07:15:00,25.1033,4.652449,1
3579,2014-02-27 00:50:00,19.165,3.026441,1


In [None]:
valid['anomaly'] = 0
valid.loc[valid_anomalies.index, 'anomaly'] = 1
valid.iloc[valid_anomalies.index]

Unnamed: 0,timestamp,value,stand_value,anomaly
946,2014-04-13 06:52:00,76.23,18.651805,1
2585,2014-04-18 23:27:00,25.11,4.654283,1


In [None]:
train.head()

Unnamed: 0,timestamp,value,stand_value,anomaly
0,2014-02-14 14:30:00,6.456,-0.453498,0
1,2014-02-14 14:35:00,5.816,-0.628741,0
2,2014-02-14 14:40:00,6.268,-0.504976,0
3,2014-02-14 14:45:00,5.816,-0.628741,0
4,2014-02-14 14:50:00,5.862,-0.616145,0


# Dataset Preparation for CNN





In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:
class CPUDataset(Dataset):
    def __init__(self, data: pd.DataFrame, size: int, 
                 step: int = 1):
        self.chunks = torch.FloatTensor(data['stand_value']).unfold(0, size+1, step)
        self.chunks = self.chunks.view(-1, 1, size+1)
    def __len__(self):
        return self.chunks.size(0)
    
    def __getitem__(self, i):
        x = self.chunks[i, :, :-1]
        y = self.chunks[i, :, -1:].squeeze(1)
        return x, y

In [None]:
n_factors = 10
train_ds = CPUDataset(train, n_factors)
valid_ds = CPUDataset(valid, n_factors)

# convolution layer

In [None]:
# here PyTorch has all neural net functions and activations
import torch.nn as nn

def conv_layer(in_feat, out_feat, kernel_size=3, stride=1,
               padding=1, relu=True):
    res = [
        nn.Conv1d(in_feat, out_feat, kernel_size=kernel_size,
                  stride=stride, padding=padding, bias=False),
        nn.BatchNorm1d(out_feat),
    ]
    if relu:
        res.append(nn.ReLU())
    return nn.Sequential(*res)

In [None]:
class ResBlock(nn.Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.in_feat, self.out_feat = in_feat, out_feat
        self.conv1 = conv_layer(in_feat, out_feat)
        self.conv2 = conv_layer(out_feat, out_feat, relu=False)
        if self.apply_shortcut:
            self.shortcut = conv_layer(in_feat, out_feat,
                                       kernel_size=1, padding=0,
                                       relu=False)
    
    def forward(self, x):
        out = self.conv1(x)
        if self.apply_shortcut:
            x = self.shortcut(x)
        return x + self.conv2(out)
    
    @property
    def apply_shortcut(self):
        return self.in_feat != self.out_feat

In [None]:
class AdaptiveConcatPool1d(nn.Module):
    def __init__(self):
        super().__init__()
        self.ap = nn.AdaptiveAvgPool1d(1)
        self.mp = nn.AdaptiveMaxPool1d(1)
    
    def forward(self, x): 
        return torch.cat([self.mp(x), self.ap(x)], 1)

In [None]:
class CNN(nn.Module):
    def __init__(self, out_size):
        super().__init__()
        self.base = nn.Sequential(
            ResBlock(1, 8), #shape = batch, 8, n_factors
            ResBlock(8, 8), 
            ResBlock(8, 16), #shape = batch, 16, n_factors
            ResBlock(16, 16),
            ResBlock(16, 32), #shape = batch, 32, n_factors
            ResBlock(32, 32),
            ResBlock(32, 64), #shape = batch, 64, n_factors
            ResBlock(64, 64),
        )
        self.head = nn.Sequential(
            AdaptiveConcatPool1d(), #shape = batch, 128, 1
            nn.Flatten(),
            nn.Linear(128, out_size)
        )
        
    def forward(self, x):
        out = self.base(x)
        out = self.head(out)
        return out

# Training of the model

In [None]:
from tqdm.notebook import tqdm
import torch.optim as opt
import math
import copy

Definition of the training loop

In [None]:
def train_model(model: CNN, dataloaders: dict, optimizer: opt.Optimizer, 
                scheduler, criterion, device: torch.device, epochs: int):
    losses_data = {'train': [], 'valid': []}
    model.to(device)
    
    # Loop over epochs
    for epoch in tqdm(range(epochs)):
        print(f'Epoch {epoch}/{epochs-1}')
        
        # Training and validation phases
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.
            running_total = 0.
            
            # Loop over batches of data
            for idx, batch in tqdm(enumerate(dataloaders[phase]), 
                                   total=len(dataloaders[phase]), 
                                   leave=False
                                   ):
                x, y = batch
                x = x.to(device)
                y = y.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    out = model(x)
                    loss = criterion(out, y)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        scheduler.step()

                running_loss += loss.item() * y.size(0)
                running_total += y.size(0)

            epoch_loss = running_loss / running_total
            print(f'{phase.capitalize()} Loss: {epoch_loss}')
            losses_data[phase].append(epoch_loss)
    return losses_data

Initialization of the model, dataloaders and training parameters

In [None]:
epochs = 50
cnn_model = CNN(out_size=1)
dataloaders = {
    'train': DataLoader(train_ds, batch_size=128, shuffle=True),
    'valid': DataLoader(valid_ds, batch_size=128)
}
optim = opt.Adam(cnn_model.parameters(), lr=1e-1, weight_decay=1e-3)
sched = opt.lr_scheduler.OneCycleLR(optim, max_lr=1e-3, steps_per_epoch=len(dataloaders['train']), epochs=epochs)
criterion = nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Training

In [None]:
losses = train_model(cnn_model, dataloaders, optim, sched, criterion, device, epochs)


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

Epoch 0/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.5924120544794482


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 7.201112323587418
Epoch 1/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.1318229537425736


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 3.6531462460119055
Epoch 2/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.0816447966834433


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 9.588012379965102
Epoch 3/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.05954241132236784


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 6.470523613667618
Epoch 4/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.0507812244360984


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 9.968464430204854
Epoch 5/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.04870546831643326


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 4.732889224964549
Epoch 6/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.04068523391614442


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 4.7496432603546666
Epoch 7/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.04833117910081448


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 23.87640881241998
Epoch 8/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.09609839561557901


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 6.385634009960218
Epoch 9/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.0747003069972737


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 3.0302861693603727
Epoch 10/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.05256099327991164


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.7816179021303776
Epoch 11/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.044896244648927365


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.6577089151774159
Epoch 12/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.039356939983658384


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 1.1037251063539637
Epoch 13/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.0445107842398662


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.5673698670476719
Epoch 14/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.032033349225720854


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.5956492220463295
Epoch 15/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.026685807055528338


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.7559249613902748
Epoch 16/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.024521110219994865


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.6896491007298631
Epoch 17/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.024803572183837053


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.6866751931485461
Epoch 18/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.023046864167945057


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.883657482312367
Epoch 19/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.02711140286327446


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3803607985962688
Epoch 20/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.024383769168234596


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3830706990295711
Epoch 21/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.023167928657621843


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3704941401740676
Epoch 22/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.02260064113142329


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.5149371963807565
Epoch 23/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.0258024031067594


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 1.0946187275609587
Epoch 24/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.02084513703358375


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.33944788469211906
Epoch 25/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.019084623719791107


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3164568179685758
Epoch 26/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.019476147965041338


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3453165696723612
Epoch 27/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.021778410348776322


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4274134206451746
Epoch 28/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.018591624196046385


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.43132825278779396
Epoch 29/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.01946238802643127


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.574540099409728
Epoch 30/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.01891149598817989


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.47777791436870914
Epoch 31/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.020392854783215345


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.30022346729635896
Epoch 32/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.01868209573206381


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.493909530819559
Epoch 33/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.01837348410084451


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3289451617349267
Epoch 34/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.017792128940904352


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.48987514880953115
Epoch 35/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016987811406302785


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.38257146876792536
Epoch 36/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.01727243605954827


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.30255109098036453
Epoch 37/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.017167086483774525


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4422990386163578
Epoch 38/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.017098091559392903


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4202456352597798
Epoch 39/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.017163955164246153


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.39895942302588383
Epoch 40/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016414735444938586


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3994948095028404
Epoch 41/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016241174585346647


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.3545554944164477
Epoch 42/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016671351809425104


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.43387487305449585
Epoch 43/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016203601785793286


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.38371942959603594
Epoch 44/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.01618790691222927


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4505386911906869
Epoch 45/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016307903593105057


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4463647654745959
Epoch 46/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.016051493684216142


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4247415851799781
Epoch 47/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.015966346454521812


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4408451179880695
Epoch 48/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.015949694451346644


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.382096799213693
Epoch 49/49


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Train Loss: 0.015779316469782447


HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

Valid Loss: 0.4001402238829405



Plot of the training and validation losses

In [None]:
layout = dict(xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(y=losses['train'], mode='lines', name='Train Loss',))
fig.add_trace(go.Scatter(y=losses['valid'], mode='lines', name='Valid Loss'))

In [None]:
print(min(*losses['train']))
print(min(*losses['valid']))

0.015779316469782447
0.30022346729635896


# Results Evaluation

## Plot of the 'pure' result for train data

Getting of the 'pure' result

In [None]:
# Switching model into evaluation mode
cnn_model = cnn_model.eval()

# Calculation of the predictions for training data
with torch.no_grad():
    res_train = cnn_model(train_ds[:][0].to(device))
res_train = res_train.cpu()



Plot of the result:


*   Blue points - real values
*   Orange points - predicted values

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization (Standardized)'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['stand_value'], 
                         mode='markers', name='Ground Truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train['timestamp'], y=torch.flatten(res_train), 
                         mode='markers', name='Predicted Value', 
                         marker=dict(color='orange')))

## Plot of the 'pure' result for valid data

Getting of the 'pure' result

In [None]:
# Calculation of the predictions for validation data
with torch.no_grad():
    res_valid = cnn_model(valid_ds[:][0].to(device))
res_valid = res_valid.cpu()

Plot of the result:


*   Blue points - real values
*   Orange points - predicted values

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization (Standardized)'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['stand_value'], 
                         mode='markers', name='Ground Truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid['timestamp'], y=torch.flatten(res_valid), 
                         mode='markers', name='Predicted Value', 
                         marker=dict(color='orange')))

## Anomaly detection with one threshold

We use **three-sigma rule** applied to model's prediction errors to detect anomalies

### Threshold calculation

Calculation of the prediction errors for **training** data *(and only for training)*

In [None]:
def calculate_prediction_errors(
    model: CNN, dataset: CPUDataset, criterion, 
    device: torch.device
    ):
    with torch.no_grad():
        errors = []
        for x, y in tqdm(dataset):
            x = x.to(device)[None]
            y = y.to(device)[None]
            predicted = model(x)
            prediction_error = criterion(predicted, y)
            errors.append(prediction_error.cpu())
        return errors

train_pred_errors = calculate_prediction_errors(cnn_model, train_ds, criterion, device)
valid_pred_errors = calculate_prediction_errors(cnn_model, valid_ds, criterion, device)


HBox(children=(FloatProgress(value=0.0, max=4022.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4022.0), HTML(value='')))




In [None]:
np.mean(train_pred_errors), np.std(train_pred_errors)

(0.0150372125, 0.3762829)

The threshold is calculated as the **mean** of the prediction errors **+ 3 standard deviations** of them

In [None]:
pred_error_threshold = np.mean(train_pred_errors) + 3 * np.std(train_pred_errors)
pred_error_threshold

1.1438859142363071

### Data filtering

Then, we filter results of the model according to the threshold and get the **indexes** of detected anomalies

In [None]:
from typing import Union

def detect_anomalies(
    result: torch.Tensor, dataset: CPUDataset, 
    threshold: Union[float, pd.Series], n_factors: int = 0
):
    anomalies_idxs = []
    # We filter each item
    for i in range(len(dataset)):
        # The case of dynamic threshold
        if type(threshold) == pd.Series:
            is_anomaly = (criterion(result[i], dataset[i][1]) > threshold[i])
        # The case of static threshold
        else:
            is_anomaly = (criterion(result[i], dataset[i][1]) > threshold)
        if is_anomaly:
            # Since the index of the prediction is next after 
            # the index of the last factor we should add the amount
            # of the factors
            anomalies_idxs.append(i + n_factors)
    return anomalies_idxs

In [None]:
train_anomalies_idxs = detect_anomalies(
    res_train, train_ds, pred_error_threshold, n_factors
)
train_anomalies_idxs

[3080]

Plot of the result for training data:


*   Blue points - non-anomaly data
*   Red points - detected anomaly data
*   Green points - real anomaly data

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train_anomalies['timestamp'], y=train_anomalies['value'], 
                         mode='markers', name='Real Anomaly',
                         marker=dict(color='green', size=13)))
fig.add_trace(go.Scatter(x=train['timestamp'][train_anomalies_idxs],
                         y=train['value'][train_anomalies_idxs], 
                         mode='markers', name='Detected Anomaly',
                         marker=dict(color='red', size=7)))

In [None]:
valid_anomalies_idxs = detect_anomalies(
    res_valid, valid_ds, pred_error_threshold, n_factors
)

Plot of the result for validation data:


*   Blue points - non-anomaly data
*   Red points - detected anomaly data
*   Green points - real anomaly data

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid_anomalies['timestamp'], y=valid_anomalies['value'], 
                         mode='markers', name='Real Anomaly',
                         marker=dict(color='green', size=13)))
fig.add_trace(go.Scatter(x=valid['timestamp'][valid_anomalies_idxs],
                         y=valid['value'][valid_anomalies_idxs], 
                         mode='markers', name='Detected Anomaly',
                         marker=dict(color='red', size=7)))

### Metrics calculation

Finally, we calculate several metrics for the model with one threshold:


*   Confusion matrix
*   Precision
*   Recall
*   F-beta score

In [None]:
from sklearn.metrics import precision_recall_fscore_support

In [None]:
def calculate_metrics(ground_truth: pd.DataFrame, anomalies_idxs: list):
    predictions = pd.DataFrame(index=range(len(ground_truth)), columns=['predicted_anomaly'])
    predictions['predicted_anomaly'] = 0
    predictions.iloc[anomalies_idxs] = 1
    
    confusion_matrix = pd.crosstab(ground_truth.loc[:, 'anomaly'], predictions['predicted_anomaly'], margins=True)
    precision, recall, f1, _ = precision_recall_fscore_support(
        ground_truth.loc[:, 'anomaly'], predictions['predicted_anomaly'], beta=2., average='binary'
    )
    return confusion_matrix, precision, recall, f1

In [None]:
train_conf_matrix, *train_metrics = calculate_metrics(
    train, train_anomalies_idxs
)
train_conf_matrix

predicted_anomaly,0,1,All
anomaly,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,4030,0,4030
1,1,1,2
All,4031,1,4032


In [None]:
print(f'Train:\n Precision: {train_metrics[0]:.3f}\n Recall: {train_metrics[1]:.3f}\n F1 score: {train_metrics[2]:.3f}')

Train:
 Precision: 1.000
 Recall: 0.500
 F1 score: 0.556


In [None]:
valid_conf_matrix, *valid_metrics = calculate_metrics(
    valid, valid_anomalies_idxs
)
valid_conf_matrix

predicted_anomaly,0,1,All
anomaly,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3796,234,4030
1,0,2,2
All,3796,236,4032


In [None]:
print(f'Valid:\n Precision: {valid_metrics[0]:.3f}\n Recall: {valid_metrics[1]:.3f}\n F1 score: {valid_metrics[2]:.3f}')

Valid:
 Precision: 0.008
 Recall: 1.000
 F1 score: 0.041


## Anomaly detection with dynamic threshold

Dynamic threshold is calculated for each point depending on mean and standart deviation in window around this point

### Threshold calculation

Definition of the window and coefficient for standard deviation, based on which the threshold is calculated

In [None]:
window = 40
std_coef = 6

Calculation of the dynamic threshold using the prediction errors for **training** data

In [None]:
# We use Series from pandas to calculate windowed errors
train_pred_errors_windowed = pd.Series(train_pred_errors).rolling(window=window, min_periods=1)
# Dynamic threshold for the training data
train_dynamic_threshold = train_pred_errors_windowed.mean() + std_coef * train_pred_errors_windowed.std()

Calculation of the dynamic threshold using the prediction errors for **validation** data

In [None]:
# We use Series from pandas to calculate windowed errors
train_pred_errors_windowed = pd.Series(train_pred_errors).rolling(window=window, min_periods=1)
# Dynamic threshold for the training data
train_dynamic_threshold = train_pred_errors_windowed.mean() + std_coef * train_pred_errors_windowed.std()

### Data filtering

Then, we filter results of the model according to the thresholds and get the **indexes** of detected anomalies

In [None]:
from typing import Union

def detect_anomalies(
    result: torch.Tensor, dataset: CPUDataset, 
    threshold: Union[float, pd.Series], n_factors: int = 0
):
    anomalies_idxs = []
    # We filter each item
    for i in range(len(dataset)):
        # The case of dynamic threshold
        if type(threshold) == pd.Series:
            is_anomaly = (criterion(result[i], dataset[i][1]) > threshold[i])
        # The case of static threshold
        else:
            is_anomaly = (criterion(result[i], dataset[i][1]) > threshold)
        if is_anomaly:
            # Since the index of the prediction is next after 
            # the index of the last factor we should add the amount
            # of the factors
            anomalies_idxs.append(i + n_factors)
    return anomalies_idxs

In [None]:
# We are going to use this handy function for all metrics at once 
# except confusion matrix
from sklearn.metrics import precision_recall_fscore_support

def calculate_metrics(
    ground_truth: pd.DataFrame, anomalies_idxs: list
    ):
    predictions = pd.DataFrame(
        index=range(len(ground_truth)), 
        columns=['anomaly_predicted']
    )
    predictions['anomaly_predicted'] = 0
    predictions.iloc[anomalies_idxs] = 1
    
    # Calculation of the confusion matrix can be done using pandas
    confusion_matrix = pd.crosstab(
        ground_truth.loc[:, 'anomaly_label'],
        predictions['anomaly_predicted'], 
        margins=True
    )
    precision, recall, f1, _ = precision_recall_fscore_support(
        ground_truth.loc[:, 'anomaly_label'],
        predictions['anomaly_predicted'], 
        beta=2., 
        average='binary'
    )
    return confusion_matrix, precision, recall, f1

In [None]:
train_anomalies_dynamic_idxs = detect_anomalies(
    res_train, train_ds, train_dynamic_threshold, n_factors
)
train_anomalies_dynamic_idxs

[1649, 3080]

In [None]:
valid_pred_errors_windowed = pd.Series(valid_pred_errors).rolling(window=window, min_periods=1)
# Dynamic threshold for the validation data
valid_dynamic_threshold = valid_pred_errors_windowed.mean() + std_coef * valid_pred_errors_windowed.std()

In [None]:
valid_anomalies_dynamic_idxs = detect_anomalies(
    res_valid, valid_ds, valid_dynamic_threshold, n_factors
)
valid_anomalies_dynamic_idxs

[946, 2585]

Plot of the result for training data:


*   Blue points - non-anomaly data
*   Red points - detected anomaly data
*   Green points - real anomaly data

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train_anomalies['timestamp'], y=train_anomalies['value'], 
                         mode='markers', name='Real Anomaly',
                         marker=dict(color='green', size=13)))
fig.add_trace(go.Scatter(x=train['timestamp'][train_anomalies_dynamic_idxs],
                         y=train['value'][train_anomalies_dynamic_idxs], 
                         mode='markers', name='Detected Anomaly',
                         marker=dict(color='red', size=7)))

Plot of the result for validation data:


*   Blue points - non-anomaly data
*   Red points - detected anomaly data
*   Green points - real anomaly data

In [None]:
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid_anomalies['timestamp'], y=valid_anomalies['value'], 
                         mode='markers', name='Real Anomaly',
                         marker=dict(color='green', size=13)))
fig.add_trace(go.Scatter(x=valid['timestamp'][valid_anomalies_dynamic_idxs],
                         y=valid['value'][valid_anomalies_dynamic_idxs], 
                         mode='markers', name='Detected Anomaly',
                         marker=dict(color='red', size=7)))

### Metrics calculation

Finally, we calculate several metrics for the model with dynamic threshold:


*   Confusion matrix
*   Precision
*   Recall
*   F-beta score

Metrics for training data

In [None]:
train_anomalies_idxs = detect_anomalies(
    res_train, train_ds, pred_error_threshold, n_factors
)
valid_anomalies_idxs = detect_anomalies(
    res_valid, valid_ds, pred_error_threshold, n_factors
)

In [None]:
def calculate_metrics(ground_truth: pd.DataFrame, anomalies_idxs: list):
    predictions = pd.DataFrame(index=range(len(ground_truth)), columns=['predicted_anomaly'])
    predictions['predicted_anomaly'] = 0
    predictions.iloc[anomalies_idxs] = 1
    
    confusion_matrix = pd.crosstab(ground_truth.loc[:, 'anomaly'], predictions['predicted_anomaly'], margins=True)
    precision, recall, f1, _ = precision_recall_fscore_support(
        ground_truth.loc[:, 'anomaly'], predictions['predicted_anomaly'], beta=2., average='binary'
    )
    return confusion_matrix, precision, recall, f1

In [None]:
train_conf_matrix, *train_metrics = calculate_metrics(
    train, train_anomalies_idxs
)
train_conf_matrix

predicted_anomaly,0,1,All
anomaly,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,4030,0,4030
1,1,1,2
All,4031,1,4032


In [None]:
print(f'Train:\n Precision: {train_metrics[0]:.3f}\n Recall: {train_metrics[1]:.3f}\n F1 score: {train_metrics[2]:.3f}')

Train:
 Precision: 1.000
 Recall: 0.500
 F1 score: 0.556


Metrics for validation data

In [None]:
valid_conf_matrix, *valid_metrics = calculate_metrics(valid, valid_anomalies_dynamic_idxs)
valid_conf_matrix

predicted_anomaly,0,1,All
anomaly,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,4030,0,4030
1,0,2,2
All,4030,2,4032


In [None]:
print(f'Valid:\n Precision: {valid_metrics[0]:.3f}\n Recall: {valid_metrics[1]:.3f}\n F1 score: {valid_metrics[2]:.3f}')

Valid:
 Precision: 1.000
 Recall: 1.000
 F1 score: 1.000
