In [1]:
import numpy as np              # NumPy, for working with arrays/tensors 
import time                     # For measuring time
import random                   # Python's random library
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# PyTorch libraries:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt

if torch.cuda.is_available():
  print("Cuda (GPU support) is available and enabled!")
  device = torch.device("cuda")
else:
  print("Cuda (GPU support) is not available :(")
  device = torch.device("cpu")


Cuda (GPU support) is not available :(


In [2]:
LABELS_FILEPATH = "./SMAP MSL/labeled_anomalies.csv"
TRAINSET_FILEPATH = "./SMAP MSL/data/data/train"
TESTSET_FILEPATH = "./SMAP MSL/data/data/test"

In [3]:
labels = pd.read_csv(LABELS_FILEPATH)

In [4]:
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82 entries, 0 to 81
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   chan_id            82 non-null     object
 1   spacecraft         82 non-null     object
 2   anomaly_sequences  82 non-null     object
 3   class              82 non-null     object
 4   num_values         82 non-null     int64 
dtypes: int64(1), object(4)
memory usage: 3.3+ KB


In [5]:
labels.head(50)

Unnamed: 0,chan_id,spacecraft,anomaly_sequences,class,num_values
0,P-1,SMAP,"[[2149, 2349], [4536, 4844], [3539, 3779]]","[contextual, contextual, contextual]",8505
1,S-1,SMAP,"[[5300, 5747]]",[point],7331
2,E-1,SMAP,"[[5000, 5030], [5610, 6086]]","[contextual, contextual]",8516
3,E-2,SMAP,"[[5598, 6995]]",[point],8532
4,E-3,SMAP,"[[5094, 8306]]",[point],8307
5,E-4,SMAP,"[[5450, 8261]]",[point],8354
6,E-5,SMAP,"[[5600, 5920]]",[point],8294
7,E-6,SMAP,"[[5610, 5675]]",[point],8300
8,E-7,SMAP,"[[5394, 5674]]",[point],8310
9,E-8,SMAP,"[[5400, 6022]]",[point],8532


In [6]:
#training params
BATCH_SIZE = 128
INPUT_LENGTH = 100
INTERMEDIATE_LENGTH = 24
ALPHA  =0.1
DIMENSIONS  = 25 
lr = 0.005
#optimizer = torch.optim.Adam(lr = lr )
loss = nn.MSELoss()

In [7]:
#data loading 
import os
train_datas = {}
test_datas = {}
arranged_train_datas = {}
arranged_test_datas = {}
# Iterate directory
for path in os.listdir(TRAINSET_FILEPATH):
    # check if current path is a file
    if os.path.isfile(os.path.join(TRAINSET_FILEPATH, path)):
        train_data = np.load(os.path.join(TRAINSET_FILEPATH, path))
        train_datas[path] = train_data
for path in os.listdir(TESTSET_FILEPATH):
    # check if current path is a file
    if os.path.isfile(os.path.join(TESTSET_FILEPATH, path)):
        test_data = np.load(os.path.join(TESTSET_FILEPATH, path))
        test_datas[path] = test_data

# for train_data in train_datas:
#     print(train_datas[train_data ].shape)
    

# for test_data in test_datas:
#     print(test_datas[test_data ].shape)



In [8]:
wL = INPUT_LENGTH     # window length
zeta = 1    # horizon length (future length to predict)
#4.1 data normalization 
#1 - training normalization 
train_means = {}
train_variances = {}
windowedTrain = {}      # Dividing the data with windows
futuredTrain = {}       # Creating arrays to be futured
for train_data in train_datas:
    train_datas[train_data] = normalize(train_datas[train_data])
    train_datas[train_data] = train_datas[train_data] * 2 - 1  # variance 1 olacak teyit et
    train_means[train_data] = np.mean(train_datas[train_data],axis=0)
    train_variances[train_data] = np.var(train_datas[train_data],axis=0)

    windowedTrain[train_data] = np.zeros((train_datas[train_data].shape[0]-wL-zeta,wL,train_datas[train_data].shape[1]))
    futuredTrain[train_data]  = np.zeros((train_datas[train_data].shape[0]-wL-zeta,zeta,train_datas[train_data].shape[1]))
    for i in range(0,train_datas[train_data].shape[0]-wL-zeta):
        windowedTrain[train_data][i] = train_datas[train_data][i:i+wL,:]
        futuredTrain[train_data][i]  = train_datas[train_data][i+wL:i+wL+zeta,:]

In [9]:
#2 - test normalization
windowedTest = {}
futuredTest = {}
test_means = {}
test_variances = {}
expected_values = {}
expected2_values = {}
mean_zero = train_means.copy()
variance_zero = train_variances.copy()

In [10]:
# range'i 1'den baslat, test_means[test_data+1] falan, ilk eleman 0 ve 1 gbi normalize edilmiş mean ve variance değgeri olacak.
for test_data in test_datas:
    # generate expected value of for discrete datas
    windowedTest[test_data] = np.zeros((test_datas[test_data].shape[0]-wL-zeta,wL,test_datas[test_data].shape[1]))
    futuredTest[test_data]  = np.zeros((test_datas[test_data].shape[0]-wL-zeta,zeta,test_datas[test_data].shape[1]))
    expected_values[test_data]  = np.zeros((test_datas[test_data].shape[0]-wL-zeta,1,test_datas[test_data].shape[1]))
    expected2_values[test_data] = np.zeros((test_datas[test_data].shape[0]-wL-zeta,1,test_datas[test_data].shape[1]))

    for i in range(1,test_datas[test_data].shape[0]-wL-zeta):
        windowedTest[test_data][i] = test_datas[test_data][i:i+wL,:]
        futuredTest[test_data][i]  = test_datas[test_data][i+wL:i+wL+zeta,:]
        expected_values[test_data] = np.mean(test_datas[test_data],axis=0)
        expected2_values[test_data] = np.mean(test_datas[test_data]**2,axis=0)
        test_means[test_data] = (1-ALPHA)*mean_zero[test_data] + ALPHA*expected_values[test_data] 
        test_variances[test_data] = (1-ALPHA)*variance_zero[test_data]+ALPHA*(expected2_values[test_data]-expected_values[test_data]**2)


In [11]:
print(train_means["A-1.npy"].shape)

(25,)


In [24]:
bbb = np.random.randint(0,10,(2,3))
print(bbb)
print(bbb**2)

[[7 9 4]
 [4 2 4]]
[[49 81 16]
 [16  4 16]]


In [25]:
print(train_datas["P-1.npy"].shape)
print(windowedTrain["P-1.npy"].shape)
print(futuredTrain["P-1.npy"].shape)

print(test_datas["P-1.npy"].shape)
print(windowedTest["P-1.npy"].shape)
print(futuredTest["P-1.npy"].shape)

(2872, 25)
(2771, 100, 25)
(2771, 1, 25)
(8505, 25)
(8404, 100, 25)
(8404, 1, 25)


In [26]:
#4.2 temporal correlation
#inout sub block 


class InputSubBlock(nn.Module):

    def __init__(self):
        super(InputSubBlock, self).__init__()
        self.fc1 = nn.Linear(INPUT_LENGTH, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 50)
        self.fc4 = nn.Linear(50, 50)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    
#cascade sub block 
class CascadeSubBlock(nn.Module):

    def __init__(self):
        super(CascadeSubBlock, self).__init__()
        self.fc1 = nn.Linear(INPUT_LENGTH, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, INPUT_LENGTH)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x
#forecasting sub block
class ForecastingSubBlock(nn.Module):

    def __init__(self):
        super(ForecastingSubBlock, self).__init__()
        self.fc1 = nn.Linear(INPUT_LENGTH, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, INTERMEDIATE_LENGTH)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x
#MLPBlock
class MLPBlock(nn.Module):

    def __init__(self):
        super(MLPBlock, self).__init__()
        self.input = InputSubBlock()
        self.cascade = CascadeSubBlock()
        self.forecasting = ForecastingSubBlock()

    def forward(self, x):
        x = self.input(x)
        cascade = self.cascade(x)
        forecast = self.forecasting(x)
        return cascade , forecast
class TemporalModule(nn.Module):
    def __init__(self):
        super(TemporalModule, self).__init__()
        self.mlp1 = MLPBlock() 
        self.mlp2 = MLPBlock() 

    def forward(self, input):
        mlp1_out , forecast = self.mlp1(input)
        new_input = mlp1_out - input 
        mlp2_out , forecast_2 = self.mlp2(new_input)
        return (forecast + forecast_2)

s = TemporalModule()

In [64]:
tWindowed   =  torch.tensor(windowedTrain["A-1.npy"][0:BATCH_SIZE,:,0])
tWindowed = tWindowed.to(torch.float32)


In [65]:
tWindowed.shape

torch.Size([128, 100])

In [66]:
print(tWindowed.dtype)

torch.float32


In [67]:
s(tWindowed)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x50 and 100x50)