In [41]:
import numpy as np              # NumPy, for working with arrays/tensors 
import time                     # For measuring time
import random                   # Python's random library
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# PyTorch libraries:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from sklearn.preprocessing import normalize

if torch.cuda.is_available():
  print("Cuda (GPU support) is available and enabled!")
  device = torch.device("cuda")
else:
  print("Cuda (GPU support) is not available :(")
  device = torch.device("cpu")


Cuda (GPU support) is not available :(


In [42]:
LABELS_FILEPATH = "./SMAP MSL/labeled_anomalies.csv"
TRAINSET_FILEPATH = "./SMAP MSL/data/data/train"
TESTSET_FILEPATH = "./SMAP MSL/data/data/test"

In [43]:
labels = pd.read_csv(LABELS_FILEPATH)

In [44]:
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82 entries, 0 to 81
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   chan_id            82 non-null     object
 1   spacecraft         82 non-null     object
 2   anomaly_sequences  82 non-null     object
 3   class              82 non-null     object
 4   num_values         82 non-null     int64 
dtypes: int64(1), object(4)
memory usage: 3.3+ KB


In [45]:
labels.head(50)

Unnamed: 0,chan_id,spacecraft,anomaly_sequences,class,num_values
0,P-1,SMAP,"[[2149, 2349], [4536, 4844], [3539, 3779]]","[contextual, contextual, contextual]",8505
1,S-1,SMAP,"[[5300, 5747]]",[point],7331
2,E-1,SMAP,"[[5000, 5030], [5610, 6086]]","[contextual, contextual]",8516
3,E-2,SMAP,"[[5598, 6995]]",[point],8532
4,E-3,SMAP,"[[5094, 8306]]",[point],8307
5,E-4,SMAP,"[[5450, 8261]]",[point],8354
6,E-5,SMAP,"[[5600, 5920]]",[point],8294
7,E-6,SMAP,"[[5610, 5675]]",[point],8300
8,E-7,SMAP,"[[5394, 5674]]",[point],8310
9,E-8,SMAP,"[[5400, 6022]]",[point],8532


In [46]:
#training params
BATCH_SIZE = 128
INPUT_LENGTH = 100
ALPHA  =0.1
DIMENSIONS  = 25 
lr = 0.005
#optimizer = torch.optim.Adam(lr = lr )
loss = nn.MSELoss()

In [47]:
#data loading 
import os
train_datas = {}
test_datas = {}
arranged_train_datas = {}
arranged_test_datas = {}
# Iterate directory
for path in os.listdir(TRAINSET_FILEPATH):
    # check if current path is a file
    if os.path.isfile(os.path.join(TRAINSET_FILEPATH, path)):
        train_data = np.load(os.path.join(TRAINSET_FILEPATH, path))
        train_datas[path] = train_data
for path in os.listdir(TESTSET_FILEPATH):
    # check if current path is a file
    if os.path.isfile(os.path.join(TESTSET_FILEPATH, path)):
        test_data = np.load(os.path.join(TESTSET_FILEPATH, path))
        test_datas[path] = test_data

for train_data in train_datas:
    print(train_datas[train_data ].shape)



(2880, 25)
(2648, 25)
(2736, 25)
(2690, 25)
(705, 25)
(682, 25)
(2879, 25)
(762, 25)
(762, 25)
(2435, 25)
(2158, 55)
(764, 55)
(2849, 25)
(2611, 25)
(312, 25)
(1490, 25)
(3675, 55)
(2074, 55)
(1451, 55)
(2880, 25)
(2880, 25)
(2833, 25)
(2561, 25)
(2594, 25)
(2583, 25)
(2602, 25)
(2583, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2880, 25)
(2769, 25)
(2880, 25)
(2880, 25)
(2869, 25)
(2861, 25)
(2880, 25)
(2244, 55)
(2598, 55)
(2511, 55)
(3342, 55)
(2820, 25)
(2478, 25)
(2624, 25)
(2551, 25)
(2881, 25)
(2446, 25)
(2209, 55)
(2208, 55)
(2037, 55)
(2076, 55)
(2032, 55)
(1565, 55)
(1587, 55)
(2872, 25)
(4308, 55)
(3969, 55)
(2880, 55)
(3682, 55)
(2821, 25)
(2855, 25)
(2609, 25)
(2853, 25)
(2874, 25)
(2818, 25)
(926, 55)
(2875, 25)
(425, 55)
(1145, 55)
(1145, 55)
(2855, 25)
(2876, 25)
(2272, 55)
(2272, 55)
(748, 55)
(439, 55)


In [48]:
#4.1 data normalization 
#1 - training normalization 
for train_data in train_datas:
    train_datas[train_data] = normalize(train_datas[train_data])
    train_datas[train_data] =  train_datas[train_data] * 2 - 1
#2 - test normalization
test_means = {}
test_variances = {}

for test_data in test_datas:
    mean = 0
    variance = 1
    # generate expected value of for discrete datas
    expected_values = {}
    for row in test_datas[test_data]:
        for element in row:
            if element in expected_values:
                expected_values[element] += 1
            else:
                expected_values[element] = 0
                
    sum = 0
    for keys in expected_values:
        sum += expected_values[keys] 

    for keys in expected_values:
        expected_values[keys] /= sum

    #now we got expected values
    #iterate through data
    for row in test_datas[test_data]:
        for element in row:
            mean = (1 - ALPHA) * mean + ALPHA * expected_values[element]
            e_x_2 = 0
            if (element*element) in expected_values:
                e_x_2 = expected_values[element*element]
            variance = (1 - ALPHA) * variance + ALPHA * ( e_x_2 - expected_values[element] * expected_values[element]  )
            test_means[test_data + "_" + str(row) + "_" + str(element)] = mean 
            test_variances[test_data + "_" + str(row) + "_" + str(element)] = variance 
    



                 
            



KeyError: 0.5068334934640698

In [None]:
test_datas

{'A-1.npy': array([[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]]),
 'A-2.npy': array([[-0.71192239,  1.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.71192239,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.71192239,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [-0.11863311,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.11863311,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.11863311,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]]),
 'A-3.npy': array([[0.40241647, 1.        , 0.        , ..., 0.        , 0.        ,
         0.      

In [None]:
#4.2 temporal correlation
#inout sub block 


class InputSubBlock(nn.Module):

    def __init__(self):
        super(InputSubBlock, self).__init__()
        self.fc1 = nn.Linear(16 * 5 * 5, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 50)
        self.fc4 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    
#cascade sub block 
class CascadeSubBlock(nn.Module):

    def __init__(self):
        super(CascadeSubBlock, self).__init__()
        self.fc1 = nn.Linear(16 * 5 * 5, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
#forecasting sub block
class ForecastingSubBlock(nn.Module):

    def __init__(self):
        super(ForecastingSubBlock, self).__init__()
        self.fc1 = nn.Linear(16 * 5 * 5, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, 24)
        self.fc3 = nn.Linear(24, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
#MLPBlock
class MLPBlock(nn.Module):

    def __init__(self):
        super(MLPBlock, self).__init__()
        self.input = InputSubBlock()
        self.cascade = CascadeSubBlock()
        self.forecasting = ForecastingSubBlock()

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

s = MLPBlock()