# Get Multi-Variate Time Series Data

In [1]:
!wget https://s3-us-west-2.amazonaws.com/telemanom/data.zip

--2023-05-22 15:38:03--  https://s3-us-west-2.amazonaws.com/telemanom/data.zip
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.222.24, 52.92.243.144, 52.218.197.48, ...
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.222.24|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 85899803 (82M) [application/zip]
Saving to: ‘data.zip’


2023-05-22 15:38:09 (17.0 MB/s) - ‘data.zip’ saved [85899803/85899803]



In [2]:
!unzip -qq data.zip

In [3]:
!ls data

2018-05-19_15.00.10  test  train


In [4]:
!ls data/train

A-1.npy  C-2.npy   D-5.npy   E-3.npy  F-5.npy  M-3.npy	 P-3.npy   T-3.npy
A-2.npy  D-11.npy  D-6.npy   E-4.npy  F-7.npy  M-4.npy	 P-4.npy   T-4.npy
A-3.npy  D-12.npy  D-7.npy   E-5.npy  F-8.npy  M-5.npy	 P-7.npy   T-5.npy
A-4.npy  D-13.npy  D-8.npy   E-6.npy  G-1.npy  M-6.npy	 R-1.npy   T-8.npy
A-5.npy  D-14.npy  D-9.npy   E-7.npy  G-2.npy  M-7.npy	 S-1.npy   T-9.npy
A-6.npy  D-15.npy  E-10.npy  E-8.npy  G-3.npy  P-10.npy  S-2.npy
A-7.npy  D-16.npy  E-11.npy  E-9.npy  G-4.npy  P-11.npy  T-10.npy
A-8.npy  D-1.npy   E-12.npy  F-1.npy  G-6.npy  P-14.npy  T-12.npy
A-9.npy  D-2.npy   E-13.npy  F-2.npy  G-7.npy  P-15.npy  T-13.npy
B-1.npy  D-3.npy   E-1.npy   F-3.npy  M-1.npy  P-1.npy	 T-1.npy
C-1.npy  D-4.npy   E-2.npy   F-4.npy  M-2.npy  P-2.npy	 T-2.npy


In [5]:
!ls data/test

A-1.npy  C-2.npy   D-5.npy   E-3.npy  F-5.npy  M-3.npy	 P-3.npy   T-3.npy
A-2.npy  D-11.npy  D-6.npy   E-4.npy  F-7.npy  M-4.npy	 P-4.npy   T-4.npy
A-3.npy  D-12.npy  D-7.npy   E-5.npy  F-8.npy  M-5.npy	 P-7.npy   T-5.npy
A-4.npy  D-13.npy  D-8.npy   E-6.npy  G-1.npy  M-6.npy	 R-1.npy   T-8.npy
A-5.npy  D-14.npy  D-9.npy   E-7.npy  G-2.npy  M-7.npy	 S-1.npy   T-9.npy
A-6.npy  D-15.npy  E-10.npy  E-8.npy  G-3.npy  P-10.npy  S-2.npy
A-7.npy  D-16.npy  E-11.npy  E-9.npy  G-4.npy  P-11.npy  T-10.npy
A-8.npy  D-1.npy   E-12.npy  F-1.npy  G-6.npy  P-14.npy  T-12.npy
A-9.npy  D-2.npy   E-13.npy  F-2.npy  G-7.npy  P-15.npy  T-13.npy
B-1.npy  D-3.npy   E-1.npy   F-3.npy  M-1.npy  P-1.npy	 T-1.npy
C-1.npy  D-4.npy   E-2.npy   F-4.npy  M-2.npy  P-2.npy	 T-2.npy


In [6]:
!git clone https://github.com/ML4ITS/mtad-gat-pytorch.git

Cloning into 'mtad-gat-pytorch'...
remote: Enumerating objects: 6189, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 6189 (delta 11), reused 0 (delta 0), pack-reused 6161[K
Receiving objects: 100% (6189/6189), 920.67 MiB | 24.57 MiB/s, done.
Resolving deltas: 100% (2708/2708), done.
Updating files: 100% (158/158), done.


In [7]:
!ls mtad-gat-pytorch/datasets/data

labeled_anomalies.csv  msl_train_md.csv  smap_train_md.csv


# Import Libraries

In [8]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
import torch.nn as nn
from tqdm.notebook import tqdm


In [9]:
smap_channels = pd.read_csv('mtad-gat-pytorch/datasets/data/smap_train_md.csv')
print("Number of Channels : ",len(smap_channels))
smap_channels.head(5)

Number of Channels :  53


Unnamed: 0,chan_id,num_values
0,A-1,2880
1,A-2,2648
2,A-3,2736
3,A-4,2690
4,A-5,705


In [10]:
smap_channels = list(smap_channels['chan_id'].values)
len(smap_channels)

53

# Create SMAP Data from Time Series across all the Channel IDs - Each Time Step has 25 features

In [12]:
smap_data =[]

for smap_channel in smap_channels:
  tmp_data = np.load(os.path.join('data/train/',smap_channel+'.npy'))
  smap_data.extend(tmp_data)
  #print(smap_data.shape)

smap_data = np.array(smap_data)
print("Shape of SMAP Data : ", smap_data.shape)

Shape of SMAP Data :  (135183, 25)


In [14]:
smap_data

array([[0.999     , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.999     , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.999     , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.98775593, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.98417906, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.98417906, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [15]:
# Next the Data is scaled using Min-Max Scaler

In [16]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [17]:
def normalize_data(data, scaler=None):
  data = np.asarray(data, dtype=np.float32)
  if np.any(sum(np.isnan(data))):
    data = np.nan_to_num(data)
  
  if scaler is None:
    scaler=MinMaxScaler()
    scaler.fit(data)
  data=scaler.transform(data)
  print("Data normalized")

  return data, scaler

In [18]:
smap_data_norm, scaler = normalize_data(smap_data)

Data normalized


In [19]:
smap_data_pt = torch.from_numpy(smap_data)
smap_data_pt.size()

torch.Size([135183, 25])

# Create Sliding Window Dataset - Breaking down the entire time series into small temporal segments which will be used to train the model.

In [20]:
class SlidingWindowDataset(Dataset):
  def __init__(self, data, window, target_dim=None, horizon=1):
    self.data= data
    self.window = window
    self.target_dim = target_dim
    self.horizon = horizon

  def __getitem__(self,index):
    x = self.data[index : index+self.window]
    y = self.data[index + self.window : index + self.window + self.horizon]
    return x,y

  def __len__(self):
    return len(self.data) - self.window

In [21]:
Window=100
BatchSZ=256
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [22]:
smap_x_y = SlidingWindowDataset(smap_data_pt, 100)

In [23]:
smap_x_y

<__main__.SlidingWindowDataset at 0x7f68821fcac0>