In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

# Load the TensorBoard notebook extension
%load_ext tensorboard


In [None]:
df = pd.read_csv("SWaT_Normal.csv")
normal = df.drop([" Timestamp" , "Normal/Attack" ] , axis = 1)
normal.shape

df_attack  = pd.read_csv("SWaT_Attack.csv")
y_test1 = df_attack["Normal/Attack"]
attack = df_attack.drop([" Timestamp" , "Normal/Attack" ] , axis = 1)
attack.shape


(449919, 51)

In [None]:
normal = normal.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

In [None]:
from sklearn import preprocessing
'''
min_max_scaler = preprocessing.MinMaxScaler()
x = normal.values
x_scaled = min_max_scaler.fit_transform(x)
normal = pd.DataFrame(x_scaled)
from sklearn import preprocessing

x = attack.values 
x_scaled = min_max_scaler.transform(x)
attack = pd.DataFrame(x_scaled)
'''


'\nmin_max_scaler = preprocessing.MinMaxScaler()\nx = normal.values\nx_scaled = min_max_scaler.fit_transform(x)\nnormal = pd.DataFrame(x_scaled)\nfrom sklearn import preprocessing\n\nx = attack.values \nx_scaled = min_max_scaler.transform(x)\nattack = pd.DataFrame(x_scaled)\n'

In [None]:
#generating normalized train/test datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.utils import shuffle

def fit_scaler(data, scaler_type=MinMaxScaler):
  scaler = scaler_type()
  scaler.fit(data)
  return scaler

def generate_datasets_for_training(data, window_size, scaler):
  _l = len(data) 
  #normalizing values
  data = scaler.transform(data)
  Xs = []
  Ys = []
  for i in range(0, (_l - window_size)):
    # because this is an autoencoder - our Ys are the same as our Xs. No need to pull the next sequence of values
    Xs.append(data[i:i+window_size])
    Ys.append(data[i:i+window_size])
  X_train, X_test, Y_train, Y_test = [np.array(x) for x in train_test_split(Xs, Ys, train_size = 0.7, shuffle=False)]
  assert X_train.shape[2] == X_test.shape[2] == (data.shape[1] if (type(data) == np.ndarray) else len(data))
  return  (X_train.shape[2], X_train, X_test, Y_train, Y_test)

In [None]:
#prep
epochs = 50
batch_size = 128 #originally 32
window_size = 60
from keras import metrics
import keras
import tensorflow as tf
import os

#training can stop early at some minimum error threshold to avoid overfitting
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=1e-2, patience=5, verbose=0, mode='auto',
    baseline=None, restore_best_weights=True)

model_scaler = fit_scaler(normal)
feats, X, XX, Y, YY = generate_datasets_for_training(data=normal, window_size=window_size, scaler=model_scaler)

In [None]:
X[0][0]

array([0.00000000e+00, 5.29434115e-03, 5.00000000e-01, 0.00000000e+00,
       0.00000000e+00, 1.22914080e-02, 7.50992639e-02, 2.00908339e-03,
       0.00000000e+00, 5.00000000e-01, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       1.21377629e-01, 1.08624650e-04, 6.44921706e-03, 5.00000000e-01,
       5.00000000e-01, 5.00000000e-01, 5.00000000e-01, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.88673672e-01, 0.00000000e+00,
       3.96167160e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 6.79975314e-02, 4.35223511e-01,
       2.75615527e-01, 5.27549303e-01, 1.45942525e-04, 5.64546374e-04,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       8.14383482e-04, 0.00000000e+00, 1.21664406e-03, 1.46783947e-04,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00])

In [None]:
#defining and training the model

model = keras.Sequential()

#encoder layers
model.add(keras.layers.LSTM(64, kernel_initializer='he_uniform', batch_input_shape=(None, window_size, feats), return_sequences=True, name='encoder_1'))
model.add(keras.layers.LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='encoder_2'))
model.add(keras.layers.LSTM(16, kernel_initializer='he_uniform', return_sequences=False, name='encoder_3'))

#repeat vector: formats encoded vector so it's readable to the decoder
model.add(keras.layers.RepeatVector(window_size, name='encoder_decoder_bridge'))

#decoder
model.add(keras.layers.LSTM(16, kernel_initializer='he_uniform', return_sequences=True, name='decoder_1'))
model.add(keras.layers.LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='decoder_2'))
model.add(keras.layers.LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='decoder_3'))

#dense layer produces sequence similar to input
model.add(keras.layers.TimeDistributed(keras.layers.Dense(feats)))

model.compile(loss="mse",optimizer='adam')
model.build()
print(model.summary())

#model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[early_stop])
history = model.fit(x=X, y=Y, validation_data=(XX, YY), epochs=2, batch_size=batch_size, shuffle=True, callbacks=[early_stop])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_1 (LSTM)            (None, 60, 64)            29696     
                                                                 
 encoder_2 (LSTM)            (None, 60, 32)            12416     
                                                                 
 encoder_3 (LSTM)            (None, 16)                3136      
                                                                 
 encoder_decoder_bridge (Rep  (None, 60, 16)           0         
 eatVector)                                                      
                                                                 
 decoder_1 (LSTM)            (None, 60, 16)            2112      
                                                                 
 decoder_2 (LSTM)            (None, 60, 32)            6272      
                                                        

In [None]:
#generating normalized train/test datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.utils import shuffle

def fit_scaler(data, scaler_type=MinMaxScaler):
  scaler = scaler_type()
  scaler.fit(data)
  return scaler

def generate_datasets_for_training(data, window_size, scaler):
  _l = len(data) 
  #normalizing values
  data = scaler.transform(data)
  Xs = []
  Ys = []
  i = 0
  while i <=  (_l - window_size):
    # because this is an autoencoder - our Ys are the same as our Xs. No need to pull the next sequence of values
    Xs.append(data[i:i+window_size])
    Ys.append(data[i:i+window_size])
    i += window_size
  X_train, X_test, Y_train, Y_test = [np.array(x) for x in train_test_split(Xs, Ys, train_size = 0.7, shuffle=False)]
  assert X_train.shape[2] == X_test.shape[2] == (data.shape[1] if (type(data) == np.ndarray) else len(data))
  return  (X_train.shape[2], X_train, X_test, Y_train, Y_test)


In [None]:
def format_dataset_for_testing(data, window_size, scaler):
  _l = len(data)
  #normalizing values
  data = scaler.transform(data)
  Xs = []
  i = 0
  while i <=  (_l - window_size):
    # because this is an autoencoder - our Ys are the same as our Xs. No need to pull the next sequence of values
    Xs.append(data[i:i+window_size])
    i += window_size
  return np.array(Xs)


In [None]:
#format test data using scaler from training
X_test = format_dataset_for_testing(data=attack, window_size=window_size, scaler=model_scaler)


Feature names unseen at fit time:
-  AIT201
-  MV101
-  MV201
-  MV303
-  P201
- ...
Feature names seen at fit time, yet now missing:
- AIT201
- MV101
- MV201
- MV303
- P201
- ...



In [None]:
pred_X_raw = model.predict(X_test)



In [None]:
score = model.evaluate(pred_X_raw, X_test, batch_size=batch_size, verbose=1)



In [None]:
y_pred_dnn_sigmoid  = dnn.predict(x_test)



In [None]:
pred_X_raw[0][0]

array([ 0.10392685,  0.11519286,  0.12188097,  0.08695778,  0.0088117 ,
        0.10568662,  0.03748055,  0.01497934,  0.10604897,  0.16813393,
       -0.0027255 ,  0.01606499,  0.08006833,  0.03575063,  0.15675598,
        0.0172257 ,  0.03483454,  0.14730209,  0.15896496,  0.13002871,
        0.16713558,  0.09007151,  0.10421488,  0.00551953,  0.14206356,
        0.13431819,  0.07423939,  0.13465554,  0.1497671 , -0.01918647,
        0.14794129,  0.00980964, -0.0059814 ,  0.13451864,  0.14368401,
        0.06272632,  0.10939229,  0.01518664,  0.15880197,  0.15224233,
        0.17693593,  0.18606102,  0.1947558 ,  0.00115978,  0.17024875,
        0.12163817,  0.16605915,  0.02333777, -0.03909359,  0.03619824,
       -0.01641725], dtype=float32)

In [None]:
windows_normal=normal.values[np.arange(window_size)[None, :] + np.arange(normal.shape[0]-window_size)[:, None]]
windows_normal.shape

In [None]:
import abc
import logging
import random

import numpy as np
import torch
from torch.autograd import Variable

class Algorithm(metaclass=abc.ABCMeta):
    def __init__(self, module_name, name, seed, details=False):
        self.logger = logging.getLogger(module_name)
        self.name = name
        self.seed = seed
        self.details = details
        self.prediction_details = {}

        if self.seed is not None:
            random.seed(seed)
            np.random.seed(seed)

    def __str__(self):
        return self.name

    @abc.abstractmethod
    def fit(self, X):
        """
        Train the algorithm on the given dataset
        """

    @abc.abstractmethod
    def predict(self, X):
        """
        :return anomaly score
        """


class PyTorchUtils(metaclass=abc.ABCMeta):
    def __init__(self, seed, gpu):
        self.gpu = gpu
        self.seed = seed
        if self.seed is not None:
            torch.manual_seed(self.seed)
            torch.cuda.manual_seed(self.seed)
        self.framework = 0

    @property
    def device(self):
        return torch.device(f'cuda:{self.gpu}' if torch.cuda.is_available() and self.gpu is not None else 'cpu')

    def to_var(self, t, **kwargs):
        # ToDo: check whether cuda Variable.
        t = t.to(self.device)
        return Variable(t, **kwargs)

    def to_device(self, model):
        model.to(self.device)

In [None]:
pip install torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-geometric
  Downloading torch_geometric-2.2.0.tar.gz (564 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m565.0/565.0 KB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting psutil>=5.8.0
  Downloading psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (280 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.2/280.2 KB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.2.0-py3-none-any.whl size=773302 sha256=0f6d6ae37b86a6c77f9793dcdb4d0bfea0a96f7d85fd4b98f40b1724939b3b5f
  Stored in directory: /root/.cache/pip/wheels/59/a3/20/198928106d

In [None]:
pip install torch-sparse

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-sparse
  Downloading torch_sparse-0.6.16.tar.gz (208 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m208.2/208.2 KB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torch-sparse
  Building wheel for torch-sparse (setup.py) ... [?25l[?25hcanceled
[31mERROR: Operation cancelled by user[0m[31m
[0mTraceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/base_command.py", line 167, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/req_command.py", line 199, in wrapper
    return func(self, options, args)
  File "/usr/local/lib/python3.8/dist-packages/pip/_internal/commands/install.py", line 361, in run
    _, build_failures = build(
  File "/usr/local/l

In [None]:
import torch.nn as nn
from torch.autograd import Variable
import torch
from torch_geometric.nn import GCNConv, GATConv, GraphConv
import torch.nn.functional as F
from torch_geometric.data import Data, Batch



class GCNLSTMCell(nn.Module, PyTorchUtils):

    def __init__(self, nodes_num, input_dim, hidden_dim, bias=True, seed: int=0, gpu: int=None):
        """
        Initialize GCNLSTM cell.
        
        Parameters
        ----------
        nodes_num: input
            Number of nodes.
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        bias: bool
            Whether or not to add the bias.
        """

        super(GCNLSTMCell, self).__init__()
        PyTorchUtils.__init__(self, seed, gpu)

        self.nodes_num = nodes_num
        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim

        self.bias = bias
        
        self.gconv = GCNConv(in_channels=self.input_dim + self.hidden_dim,
                             out_channels=4 * self.hidden_dim,
                             bias=self.bias,
                             improved = True)

    def forward(self, input_tensor, cur_state, edge_index):
        '''
        input_tensor:(b,n,i)
        cur_state:[(b,n,h),(b,n,h)]
        '''
        h_cur, c_cur = cur_state
        
        combined = torch.cat([input_tensor, h_cur], dim=2)  # concatenate along hidden axis
        batch = Batch.from_data_list([Data(x=combined[i], edge_index=edge_index) for i in range(combined.shape[0])])
        
        combined_conv = self.gconv(batch.x, batch.edge_index)
        combined_conv = combined_conv.reshape(combined.shape[0],combined.shape[1],-1)

        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=2) 
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        # output: (b,n,h),(b,n,h)
        return h_next, c_next

    def init_hidden(self, batch_size):
        return (self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))),
                self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))))

class GATLSTMCell(nn.Module, PyTorchUtils):

    def __init__(self, nodes_num, input_dim, hidden_dim, head=1, dropout=0, bias=True, seed: int=0, gpu: int=None):
        """
        Initialize GCNLSTM cell.
        
        Parameters
        ----------
        nodes_num: input
            Number of nodes.
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        head: int
            Number of multi-head-attentions.
        dropout: float
            Dropout probability of the normalized attention coefficients.
        bias: bool
            Whether or not to add the bias.
        """

        super(GATLSTMCell, self).__init__()
        PyTorchUtils.__init__(self, seed, gpu)

        self.nodes_num = nodes_num
        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim
        self.head = head
        self.dropout = dropout
        self.bias = bias
        
        self.gconv = GATConv(in_channels=self.input_dim + self.hidden_dim,
                             out_channels=4 * self.hidden_dim,
                             heads=self.head,
                             concat = False,
                             dropout=self.dropout,
                             bias=self.bias)

    def forward(self, input_tensor, cur_state, edge_index):
        '''
        input_tensor:(b,n,i)
        cur_state:[(b,n,h),(b,n,h)]
        '''
        h_cur, c_cur = cur_state

        combined = torch.cat([input_tensor, h_cur], dim=2)  # concatenate along hidden axis
        batch = Batch.from_data_list([Data(x=combined[i], edge_index=edge_index) for i in range(combined.shape[0])])
        
        combined_conv = self.gconv(batch.x, batch.edge_index)
        combined_conv = combined_conv.reshape(combined.shape[0],combined.shape[1],-1)

        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=2) 
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)
        
        # output: (b,n,h),(b,n,h)
        return h_next, c_next

    def init_hidden(self, batch_size):
        return (self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))),
                self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))))

class WL1LSTMCell(nn.Module, PyTorchUtils):

    def __init__(self, nodes_num, input_dim, hidden_dim, bias=True, seed: int=0, gpu: int=None):
        """
        Initialize GCNLSTM cell.
        
        Parameters
        ----------
        nodes_num: input
            Number of nodes.
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        bias: bool
            Whether or not to add the bias.
        """

        super(WL1LSTMCell, self).__init__()
        PyTorchUtils.__init__(self, seed, gpu)

        self.nodes_num = nodes_num
        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim

        self.bias = bias
        
        self.gconv = GraphConv(in_channels=self.input_dim + self.hidden_dim,
                               out_channels=4 * self.hidden_dim,
                               aggr = 'mean',
                               bias=self.bias)

    def forward(self, input_tensor, cur_state, edge_index):
        '''
        input_tensor:(b,n,i)
        cur_state:[(b,n,h),(b,n,h)]
        '''
        h_cur, c_cur = cur_state
        
        combined = torch.cat([input_tensor, h_cur], dim=2)  # concatenate along hidden axis
        batch = Batch.from_data_list([Data(x=combined[i], edge_index=edge_index) for i in range(combined.shape[0])])
        
        combined_conv = self.gconv(batch.x, batch.edge_index)
        combined_conv = combined_conv.reshape(combined.shape[0],combined.shape[1],-1)

        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=2) 
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        # output: (b,n,h),(b,n,h)
        return h_next, c_next

    def init_hidden(self, batch_size):
        return (self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))),
                self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))))

class LSTMCell(nn.Module, PyTorchUtils):

    def __init__(self, nodes_num, input_dim, hidden_dim, bias=True, seed: int=0, gpu: int=None):
        """
        Initialize GCNLSTM cell.
        
        Parameters
        ----------
        nodes_num: input
            Number of nodes.
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        bias: bool
            Whether or not to add the bias.
        """

        super(LSTMCell, self).__init__()
        PyTorchUtils.__init__(self, seed, gpu)

        self.nodes_num = nodes_num
        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim

        self.bias = bias
        
        self.gconv = nn.Linear(self.input_dim + self.hidden_dim,
                               4 * self.hidden_dim,
                               bias=self.bias)

    def forward(self, input_tensor, cur_state, edge_index):
        '''
        input_tensor:(b,n,i)
        cur_state:[(b,n,h),(b,n,h)]
        '''
        h_cur, c_cur = cur_state
        
        combined = torch.cat([input_tensor, h_cur], dim=2)  # concatenate along hidden axis
        #batch = Batch.from_data_list([Data(x=combined[i], edge_index=edge_index) for i in range(combined.shape[0])])
        
        combined_conv = self.gconv(combined)
        #combined_conv = combined_conv.reshape(combined.shape[0],combined.shape[1],-1)

        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=2) 
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        # output: (b,n,h),(b,n,h)
        return h_next, c_next

    def init_hidden(self, batch_size):
        return (self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))),
                self.to_var(Variable(torch.zeros(batch_size, self.nodes_num, self.hidden_dim))))

class GraphLSTM(nn.Module, PyTorchUtils):

    def __init__(self, nodes_num, input_dim, hidden_dim, num_layers, head=1, dropout=0, kind='GCN',
                 batch_first=False, bias=True, return_all_layers=True, seed: int=0, gpu: int=None):
        super(GraphLSTM, self).__init__()
        PyTorchUtils.__init__(self, seed, gpu)

        # Make sure that `hidden_dim` are lists having len == num_layers
        hidden_dim  = self._extend_for_multilayer(hidden_dim, num_layers)
        head = self._extend_for_multilayer(head, num_layers)
        if not len(hidden_dim) == len(head) == num_layers:
            raise ValueError('Inconsistent list length.')

        self.nodes_num = nodes_num
        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.head = head
        self.dropout = dropout
        self.kind = kind
        self.batch_first = batch_first
        self.bias = bias
        self.return_all_layers = return_all_layers

        cell_list = []
        for i in range(0, self.num_layers):
            cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i-1]

            if self.kind == 'GCN':
                cell_list.append(GCNLSTMCell(nodes_num=nodes_num,
                                             input_dim=cur_input_dim,
                                             hidden_dim=self.hidden_dim[i],
                                             bias=self.bias,
                                             seed=self.seed,
                                             gpu=self.gpu))
            elif self.kind == 'GAT':
                cell_list.append(GATLSTMCell(nodes_num=nodes_num,
                                             input_dim=cur_input_dim,
                                             hidden_dim=self.hidden_dim[i],
                                             head=self.head[i],
                                             dropout=self.dropout,
                                             bias=self.bias,
                                             seed=self.seed,
                                             gpu=self.gpu))
            elif self.kind == 'WL1':
                cell_list.append(WL1LSTMCell(nodes_num=nodes_num,
                                             input_dim=cur_input_dim,
                                             hidden_dim=self.hidden_dim[i],
                                             bias=self.bias,
                                             seed=self.seed,
                                             gpu=self.gpu))
            elif self.kind == 'LIN':
                cell_list.append(LSTMCell(nodes_num=nodes_num,
                                          input_dim=cur_input_dim,
                                          hidden_dim=self.hidden_dim[i],
                                          bias=self.bias,
                                          seed=self.seed,
                                          gpu=self.gpu))
            else:
                raise NotImplementedError()

        self.cell_list = nn.ModuleList(cell_list)

    def forward(self, input_tensor, edge_index, hidden_state=None):
        """
        
        Parameters
        ----------
        input_tensor: 4-D Tensor either of shape (t, b, n, h) or (b, t, n, h)
        hidden_state: list [[(b, n, h), (b, n, h)]] * num_layers
            
        Returns
        -------
        last_output_list: [(b, t, n, h)] * num_layers(also determined by return_all_layers),
        layer_state_list: [[(b, n, h), (b, n, h)]] * num_layers(also determined by return_all_layers)
        """
        #确保(t, b, n, c)
        #if self.batch_first:
        #写在前面了
            # (b, t, n, c) -> (t, b, n, c)
            #input_tensor = input_tensor.permute(1, 0, 2, 3).contiguous()

        # Implement stateful GraphLSTM
        if hidden_state is not None:
            hidden_state = hidden_state
        else:
            # [[(b, n, h), (b, n, h)]] * num_layers
            hidden_state = self._init_hidden(input_tensor.size(1))

        layer_output_list = []
        last_state_list   = []

        seq_len = input_tensor.size(0)
        cur_layer_input = input_tensor

        for layer_idx in range(self.num_layers):

            h, c = hidden_state[layer_idx]
            output_inner = []
            for t in range(seq_len):

                h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[t],
                                                 edge_index = edge_index, cur_state=[h, c])
                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=0)
            cur_layer_input = layer_output

            layer_output_list.append(layer_output)
            last_state_list.append([h, c])

        if not self.return_all_layers:
            layer_output_list = layer_output_list[-1:]
            last_state_list   = last_state_list[-1:]

        return layer_output_list, last_state_list

    def _init_hidden(self, batch_size):
        init_states = []
        for i in range(self.num_layers):
            init_states.append(self.cell_list[i].init_hidden(batch_size))
        return init_states

    @staticmethod
    def _extend_for_multilayer(param, num_layers):
        if not isinstance(param, list):
            param = [param] * num_layers
        return param

ModuleNotFoundError: ignored

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/QAZASDEDC/TopoMAD/master/DatasetUpdate/MBD%20(1).csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
df = df.dropna()

In [None]:
df["label"].value_counts()

0.0    8074
1.0     566
Name: label, dtype: int64

In [None]:
for i in df["label"]:
  print(i)

In [None]:
!wget https://s3-us-west-2.amazonaws.com/telemanom/data.zip && unzip data.zip && rm data.zip



--2023-02-02 00:27:07--  https://s3-us-west-2.amazonaws.com/telemanom/data.zip
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.92.160.40, 52.92.212.24, 52.218.218.216, ...
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.92.160.40|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 85899803 (82M) [application/zip]
Saving to: ‘data.zip’


2023-02-02 00:27:14 (12.6 MB/s) - ‘data.zip’ saved [85899803/85899803]

Archive:  data.zip
   creating: data/2018-05-19_15.00.10/
   creating: data/2018-05-19_15.00.10/models/
  inflating: data/2018-05-19_15.00.10/models/A-1.h5  
  inflating: data/2018-05-19_15.00.10/models/A-2.h5  
  inflating: data/2018-05-19_15.00.10/models/A-3.h5  
  inflating: data/2018-05-19_15.00.10/models/A-4.h5  
  inflating: data/2018-05-19_15.00.10/models/A-5.h5  
  inflating: data/2018-05-19_15.00.10/models/A-6.h5  
  inflating: data/2018-05-19_15.00.10/models/A-7.h5  
  inflating: data/2018-05-19_15.00.1

In [None]:
pip install  ts_datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Could not find a version that satisfies the requirement ts_datasets (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for ts_datasets[0m[31m
[0m

In [None]:

import scipy.io as sio
test = sio.loadmat('/content/shuttle.mat')

In [None]:
y = test["y"]

In [None]:
x = test["X"]

In [None]:
x = test["header"]

KeyError: ignored

In [None]:
test['X']
# load .mat file into python pandas dataframe
def loadmat(filename):
    data = sio.loadmat(filename)
    return pd.DataFrame(data['X'])

In [None]:
x = loadmat('/content/shuttle.mat')

In [None]:
x

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,50,21,77,0,28,0,27,48,22
1,53,0,82,0,52,-5,29,30,2
2,37,0,76,0,28,18,40,48,8
3,37,0,79,0,34,-26,43,46,2
4,85,0,88,-4,6,1,3,83,80
...,...,...,...,...,...,...,...,...,...
49092,39,-2,80,-4,38,0,41,41,0
49093,43,0,81,1,42,-9,37,39,2
49094,49,0,87,0,46,-12,38,41,2
49095,80,0,84,0,-36,-29,4,120,116


In [None]:
import pandas as pd 
x = pd.read_csv("/content/ECG5000_TRAIN.txt",header=None,sep = "")

ParserError: ignored

In [None]:
# Download the dataset
dataframe = pd.read_csv('http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv', header=None)
raw_data = dataframe.values
X_train = dataframe[0:500]
X_train = dataframe[0:500]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,131,132,133,134,135,136,137,138,139,140
0,-0.112522,-2.827204,-3.773897,-4.349751,-4.376041,-3.474986,-2.181408,-1.818286,-1.250522,-0.477492,...,0.792168,0.933541,0.796958,0.578621,0.257740,0.228077,0.123431,0.925286,0.193137,1.0
1,-1.100878,-3.996840,-4.285843,-4.506579,-4.022377,-3.234368,-1.566126,-0.992258,-0.754680,0.042321,...,0.538356,0.656881,0.787490,0.724046,0.555784,0.476333,0.773820,1.119621,-1.436250,1.0
2,-0.567088,-2.593450,-3.874230,-4.584095,-4.187449,-3.151462,-1.742940,-1.490659,-1.183580,-0.394229,...,0.886073,0.531452,0.311377,-0.021919,-0.713683,-0.532197,0.321097,0.904227,-0.421797,1.0
3,0.490473,-1.914407,-3.616364,-4.318823,-4.268016,-3.881110,-2.993280,-1.671131,-1.333884,-0.965629,...,0.350816,0.499111,0.600345,0.842069,0.952074,0.990133,1.086798,1.403011,-0.383564,1.0
4,0.800232,-0.874252,-2.384761,-3.973292,-4.338224,-3.802422,-2.534510,-1.783423,-1.594450,-0.753199,...,1.148884,0.958434,1.059025,1.371682,1.277392,0.960304,0.971020,1.614392,1.421456,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4993,0.608558,-0.335651,-0.990948,-1.784153,-2.626145,-2.957065,-2.931897,-2.664816,-2.090137,-1.461841,...,1.757705,2.291923,2.704595,2.451519,2.017396,1.704358,1.688542,1.629593,1.342651,0.0
4994,-2.060402,-2.860116,-3.405074,-3.748719,-3.513561,-3.006545,-2.234850,-1.593270,-1.075279,-0.976047,...,1.388947,2.079675,2.433375,2.159484,1.819747,1.534767,1.696818,1.483832,1.047612,0.0
4995,-1.122969,-2.252925,-2.867628,-3.358605,-3.167849,-2.638360,-1.664162,-0.935655,-0.866953,-0.645363,...,-0.472419,-1.310147,-2.029521,-3.221294,-4.176790,-4.009720,-2.874136,-2.008369,-1.808334,0.0
4996,-0.547705,-1.889545,-2.839779,-3.457912,-3.929149,-3.966026,-3.492560,-2.695270,-1.849691,-1.374321,...,1.258419,1.907530,2.280888,1.895242,1.437702,1.193433,1.261335,1.150449,0.804932,0.0


In [None]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_labels, test_labels = train_test_split(
    dataframe, dataframe[140], test_size=0.1, random_state=21,shuffle = False
)

In [None]:
# The last element contains the labels
labels = raw_data[:, -1]

# The other data points are the electrocadriogram data
data = raw_data[:, 0:-1]

train_data, test_data, train_labels, test_labels = train_test_split(
    data, labels, test_size=0.9, random_state=21,shuffle = False
)

In [None]:
train_data = pd.DataFrame(train_data)

In [None]:
X_train = dataframe[dataframe[140]!=0]

In [None]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,131,132,133,134,135,136,137,138,139,140
0,-0.112522,-2.827204,-3.773897,-4.349751,-4.376041,-3.474986,-2.181408,-1.818286,-1.250522,-0.477492,...,0.792168,0.933541,0.796958,0.578621,0.257740,0.228077,0.123431,0.925286,0.193137,1.0
1,-1.100878,-3.996840,-4.285843,-4.506579,-4.022377,-3.234368,-1.566126,-0.992258,-0.754680,0.042321,...,0.538356,0.656881,0.787490,0.724046,0.555784,0.476333,0.773820,1.119621,-1.436250,1.0
2,-0.567088,-2.593450,-3.874230,-4.584095,-4.187449,-3.151462,-1.742940,-1.490659,-1.183580,-0.394229,...,0.886073,0.531452,0.311377,-0.021919,-0.713683,-0.532197,0.321097,0.904227,-0.421797,1.0
3,0.490473,-1.914407,-3.616364,-4.318823,-4.268016,-3.881110,-2.993280,-1.671131,-1.333884,-0.965629,...,0.350816,0.499111,0.600345,0.842069,0.952074,0.990133,1.086798,1.403011,-0.383564,1.0
4,0.800232,-0.874252,-2.384761,-3.973292,-4.338224,-3.802422,-2.534510,-1.783423,-1.594450,-0.753199,...,1.148884,0.958434,1.059025,1.371682,1.277392,0.960304,0.971020,1.614392,1.421456,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3122,-3.107567,-4.521302,-3.976050,-3.930939,-3.210632,-2.047815,-1.442905,-1.271809,-0.809131,-0.464601,...,1.278968,1.389388,1.464593,1.400539,1.125004,1.029697,0.998533,1.075872,-0.533409,1.0
3123,-1.693958,-3.318677,-3.784877,-4.073614,-3.864253,-3.347139,-2.322313,-1.650408,-1.454390,-0.877257,...,1.234095,1.209287,1.166618,0.903829,0.637425,0.944447,0.827138,-0.182124,-1.924306,1.0
3124,-2.089921,-2.982796,-3.850578,-3.861497,-3.039973,-1.381897,-0.621009,0.114544,1.033027,1.309473,...,0.369837,0.623082,1.039331,1.016398,0.299646,-0.971958,-1.783307,-2.003871,-3.500333,1.0
3125,-2.123972,-3.247339,-3.737318,-4.037332,-3.763836,-3.070836,-1.763453,-1.400594,-1.026897,-0.340909,...,0.486942,0.442708,0.167527,0.121955,-0.224867,-0.298590,-0.194366,-1.001879,-2.918817,1.0
