In [1]:
import pandas as pd
import numpy as np
import torch
random_seed = 123
np.random.seed(random_seed)

df_1 = pd.read_csv('data/normal_vec.csv', header=None)
df_2 = pd.read_csv('data/abnormal_vec.csv', header=None)
df_3 = pd.read_csv('data/error_vec.csv', header=None)

In [2]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
def normal_make_instance(df):
    scaler = StandardScaler()
    noise = np.random.normal(0, 7, (15,6))
    list_scaled = scaler.fit_transform(df + noise)
    abc = np.concatenate((np.array([[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]]),list_scaled,np.array([[0,0,0,0,0,0],[0,0,0,0,0,0]])))
    b = torch.from_numpy(abc)
    instance = [b, 0]
    return instance

def abnormal_make_instance(df):
    scaler = StandardScaler()
    noise = np.random.normal(0, 7, (19,6))
    list_scaled = scaler.fit_transform(df + noise)
    abc = np.concatenate((list_scaled, np.array([[0,0,0,0,0,0]])))
    b = torch.from_numpy(abc) 
    instance = [b, 1]
    return instance

def error_make_instance(df):
    scaler = StandardScaler()
    noise = np.random.normal(0, 7, (20,6))
    list_scaled = scaler.fit_transform(df + noise)
    b = torch.from_numpy(list_scaled)
    instance = [b, 2]
    return instance

In [3]:
def make_ds(df_nor, df_ab, df_er, num):
    dataset = []
    for i in range(num):
        dataset.append(normal_make_instance(df_nor))
        dataset.append(abnormal_make_instance(df_ab))
        dataset.append(error_make_instance(df_er))
        
    return dataset

In [4]:
total_dataset = make_ds(df_1, df_2, df_3, 10000)

In [5]:
from sklearn.model_selection import StratifiedShuffleSplit
from torch.utils.data import Subset

sss = StratifiedShuffleSplit(n_splits = 1, test_size = 0.2, random_state = random_seed)
indices = list(range(len(total_dataset)))
y_ds = [y for _, y in total_dataset]

for train_index, test_index in sss.split(indices, y_ds):
    print('train:', train_index, 'test:', test_index)
    print(len(train_index), len(test_index))

sub_train = Subset(total_dataset, train_index)
test_ds = Subset(total_dataset, test_index)

sss2 = StratifiedShuffleSplit(n_splits = 1, test_size = 0.25, random_state = random_seed)
indices2 = list(range(len(sub_train)))
y_ds2 = [y for _, y in sub_train]

for train_index2, val_index in sss2.split(indices2, y_ds2):
    print('train:', train_index2, 'val:', val_index)
    print(len(train_index2), len(val_index))

train_ds = Subset(sub_train, train_index2)
val_ds = Subset(sub_train, val_index)

train: [ 9465  4990 23348 ...  8551  7658 23236] test: [28621 18384 29068 ... 29031 11744 16112]
24000 6000
train: [20242 12026  6805 ... 21690 23550 22921] val: [ 6628  2421  8359 ... 20429 12307 18328]
18000 6000


In [6]:
image, label = train_ds[0]
image.size()

torch.Size([20, 6])

In [7]:
from torch.utils.data import DataLoader
batch_size = 64
train_loader = DataLoader(train_ds, batch_size= batch_size, shuffle=True, num_workers=8, drop_last=True)
val_loader = DataLoader(val_ds, batch_size= batch_size, shuffle=True, num_workers=8, drop_last=True)
test_loader = DataLoader(test_ds, batch_size= batch_size, shuffle=False, num_workers=8, drop_last=True)

In [8]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=3, num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # setup LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)

        # setup output layer
        self.linear = nn.Linear(self.hidden_dim, output_dim)

    def forward(self, input, hidden=None):
        # lstm step => then ONLY take the sequence's final timetep to pass into the linear/dense layer
        # Note: lstm_out contains outputs for every step of the sequence we are looping over (for BPTT)
        # but we just need the output of the last step of the sequence, aka lstm_out[-1]
        lstm_out, hidden = self.lstm(input, hidden)
        logits = self.linear(lstm_out[-1])              # equivalent to return_sequences=False from Keras
        #softmax = F.softmax(logits)

        return logits, hidden

In [9]:
import torch.optim as optim

model = LSTM(input_dim = 6, hidden_dim = 3, output_dim = 3, num_layers = 1).cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-6)
#lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=20, min_lr=1e-6, verbose=True)
criterion = nn.CrossEntropyLoss()
num_epochs = 100

In [10]:
import torch

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def Accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [11]:
for epoch in range(num_epochs):
    hidden_state = None
    model.train()
    
    train_losses = AverageMeter()
    train_accs = AverageMeter()
    for idx, (images, target) in enumerate(train_loader):
        #print(images.shape)
        images = images.permute(1,0,2).cuda()
        target = target.cuda()
        images = images.type(torch.cuda.FloatTensor)
        y_pred, hidden_state = model(images, hidden_state)
        #print("y_pred",y_pred)

        stateful = False
        if not stateful:
            hidden_state = None
        else:
            h_0, c_0 = hidden_state
            h_0.detach_(), c_0.detach_()
            hidden_state = (h_0, c_0)

        train_loss = criterion(y_pred, target)
        #print(train_loss)
        
        train_losses.update(train_loss.item(), images.size(0)) 
        train_acc = Accuracy(y_pred, target)
        train_accs.update(train_acc[0].item(), images.size(0))
        
        model.zero_grad()
        train_loss.backward()  # backward pass
        optimizer.step()  # parameter update
    #lr_scheduler.step(train_loss)

    with torch.no_grad():
        model.eval()
        hidden_state = None
        
        val_losses = AverageMeter()
        val_accs = AverageMeter()
        
        for idx, (images, target) in enumerate(val_loader):
            images = images.permute(1,0,2).cuda()
            target = target.cuda()
            images = images.type(torch.cuda.FloatTensor)

            y_pred, hidden_state = model(images, hidden_state)
            
            val_loss = criterion(y_pred, target)
            val_losses.update(val_loss.item(), images.size(0)) 

            val_acc = Accuracy(y_pred, target)
            val_accs.update(val_acc[0].item(), images.size(0))
            
            stateful = True
            if not stateful:
                hidden_state = None
            else:
                h_0, c_0 = hidden_state
                h_0.detach_(), c_0.detach_()
                hidden_state = (h_0, c_0)
    
    print('Epoch : {} , train_loss : {}, train_acc : {}, val_loss : {}, val_acc : {} '.format(epoch+1, train_losses.avg, train_accs.avg, val_losses.avg, val_accs.avg))

Epoch : 1 , train_loss : 1.096594584369999, train_acc : 20.857428825622776, val_loss : 1.0877273262187999, val_acc : 22.295026881720432 
Epoch : 2 , train_loss : 1.0775833231698575, train_acc : 25.589412811387902, val_loss : 1.0668492547927364, val_acc : 29.637096774193548 
Epoch : 3 , train_loss : 1.0542462622992084, train_acc : 39.12922597864769, val_loss : 1.0405162149860012, val_acc : 49.546370967741936 
Epoch : 4 , train_loss : 1.0239962657575505, train_acc : 60.55938612099644, val_loss : 1.0062836466297027, val_acc : 69.47244623655914 
Epoch : 5 , train_loss : 0.9849368204425663, train_acc : 76.21218861209964, val_loss : 0.962416765510395, val_acc : 80.72916666666667 
Epoch : 6 , train_loss : 0.9368616944954489, train_acc : 83.67437722419929, val_loss : 0.9104658628022799, val_acc : 85.41666666666667 
Epoch : 7 , train_loss : 0.8808738071723341, train_acc : 86.98843416370107, val_loss : 0.8511446637492026, val_acc : 87.44959677419355 
Epoch : 8 , train_loss : 0.8177580010424305, 

Epoch : 61 , train_loss : 0.09758567466695538, train_acc : 98.07050711743773, val_loss : 0.11960009197073598, val_acc : 97.19422043010752 
Epoch : 62 , train_loss : 0.09570211300699312, train_acc : 98.08718861209964, val_loss : 0.11921687972962215, val_acc : 97.22782258064517 
Epoch : 63 , train_loss : 0.09419816952477146, train_acc : 98.09830960854093, val_loss : 0.11530107335858447, val_acc : 97.32862903225806 
Epoch : 64 , train_loss : 0.09261049669620405, train_acc : 98.12055160142349, val_loss : 0.11487543114250706, val_acc : 97.34543010752688 
Epoch : 65 , train_loss : 0.09111614311876245, train_acc : 98.17059608540926, val_loss : 0.11283397730640186, val_acc : 97.41263440860214 
Epoch : 66 , train_loss : 0.08940543435858662, train_acc : 98.19283807829181, val_loss : 0.11294782153701269, val_acc : 97.34543010752688 
Epoch : 67 , train_loss : 0.08821831406276422, train_acc : 98.20395907473309, val_loss : 0.11183230126256584, val_acc : 97.34543010752688 
Epoch : 68 , train_loss : 0