In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import loader2
import os
import torch
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier
import pickle
import numpy as np
import datetime

In [2]:
# Running time calculate: start time
start_time = datetime.datetime.now()
# Detection

# Diagnosis 
class waveformDiagnosisDL:
    
    def __init__(self, path):
        
        # Directory of normal and abnormal data
        normal_path = path + 'Normal/'
        fault_1_path = path + 'Fault_A/'
        fault_2_path = path + 'Fault_AE/'
        fault_3_path = path + 'Fault_B/'
        fault_4_path = path + 'Fault_C/'
        fault_5_path = path + 'Fault_D/'
        
        
        normal_files = [f for f in os.listdir(normal_path) if f.endswith('.csv')]
        fault_1_files = [f for f in os.listdir(fault_1_path) if f.endswith('.csv')]
        fault_2_files = [f for f in os.listdir(fault_2_path) if f.endswith('.csv')]
        fault_3_files = [f for f in os.listdir(fault_3_path) if f.endswith('.csv')]
        fault_4_files = [f for f in os.listdir(fault_4_path) if f.endswith('.csv')]
        fault_5_files = [f for f in os.listdir(fault_5_path) if f.endswith('.csv')]
        
        np_normal = []
        np_fault_1 = []
        np_fault_2 = []
        np_fault_3 = []
        np_fault_4 = []
        np_fault_5 = []        
        
        for f in normal_files:
            temp_file = np.loadtxt(normal_path + f, delimiter=',', dtype=np.float32)
            np_normal.append(temp_file)
        
        for f in fault_1_files:
            temp_file = np.loadtxt(fault_1_path + f, delimiter=',', dtype=np.float32)
            np_fault_1.append(temp_file)
            
        for f in fault_2_files:
            temp_file = np.loadtxt(fault_2_path + f, delimiter=',', dtype=np.float32)
            np_fault_2.append(temp_file)
            
        for f in fault_3_files:
            temp_file = np.loadtxt(fault_3_path + f, delimiter=',', dtype=np.float32)
            np_fault_3.append(temp_file)
            
        for f in fault_4_files:
            temp_file = np.loadtxt(fault_4_path + f, delimiter=',', dtype=np.float32)
            np_fault_4.append(temp_file)
            
        for f in fault_5_files:
            temp_file = np.loadtxt(fault_5_path + f, delimiter=',', dtype=np.float32)
            np_fault_5.append(temp_file)

            
        # x: input data, y: target
        self.y_data = np.concatenate([np.zeros(len(np_normal),dtype=np.long), \
                                      np.zeros(len(np_fault_1),dtype=np.long)+1, \
                                      np.zeros(len(np_fault_2),dtype=np.long)+2, \
                                      np.zeros(len(np_fault_3),dtype=np.long)+3, \
                                      np.zeros(len(np_fault_4),dtype=np.long)+4, \
                                      np.zeros(len(np_fault_5),dtype=np.long)+5, ])
        self.x_data = np.concatenate([np_normal, np_fault_1, np_fault_2, np_fault_3, np_fault_4, 
                                      np_fault_5])
        self.len = self.x_data.shape[0]
        self.x_data = torch.from_numpy(self.x_data)
        self.y_data = torch.from_numpy(self.y_data)
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    def __len__(self):
        return self.len

In [3]:
data_path_DL = '/Users/Kayvon/Google Drive/reference_code/w20_newdataset/fault_diagnosis/deep_learning/'
all_data_DL = waveformDiagnosisDL(data_path_DL)

In [11]:
#%%----------------------create the LSTM Net ------------------------------------
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        
        self.lstm = nn.LSTM(
                input_size=INPUT_SIZE,
                hidden_size=64,
                num_layers=10,
                batch_first=True,
                )
        
        self.out = nn.Linear(64, 6)
    
    def forward(self, x):
        lstm_out, (h_n, h_c) = self.lstm(x, None)
        out = self.out(lstm_out[:, -1, :])
        return out
    
data_output = {'lstm':{'F1':[], 'precision':[], 'recall':[], 'accuracy':[], 'auc':[], 'fpr':[], 'tpr':[], 'test_loss':[], 'train_loss':[]}}

In [13]:
EPOCH = 250
BATCH_SIZE = 512
TIME_STEP = 20
INPUT_SIZE = 408
LR = 0.0001
KFOLD = 1
isGPU = torch.cuda.is_available()

In [14]:
for num_of_training in range(KFOLD):
    print('------------------fold {}------------------------'.format(num_of_training + 1))
    lstm = LSTM()
    
    if isGPU:
        lstm = nn.DataParallel(lstm, device_ids=[0])
        lstm.cuda()

    lstm_optimizer = torch.optim.Adam(lstm.parameters(), lr=LR)
    # lstm_optimizer = torch.optim.SGD(lstm.parameters(), lr=LR)
    loss_func = nn.CrossEntropyLoss()
    
    # print the structure of the network
    print(lstm)
    
    # data partition: 15% testing, 85% training
    training_data, test_data = torch.utils.data.random_split(all_data_DL, [int(all_data_DL.len * 0.85), all_data_DL.len - int(all_data_DL.len * 0.85)])
    training_Loader = DataLoader(dataset=training_data, batch_size=BATCH_SIZE, shuffle=True)
    test_Loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE)
    
    # training and testing
    lstm_test_loss_draw = []
    
    lstm_loss_draw = []
    
    for epoch in range(EPOCH):
        print('-----------------------epoch {}----------------------'.format(epoch + 1))
        
        # training-----------------------------------------
        lstm_train_loss = 0.
        
        lstm_train_acc = 0.
        
        lstm.train()
        
        for step, (batch_x, batch_y) in enumerate(training_Loader):
            batch_x = batch_x.view(-1, TIME_STEP, INPUT_SIZE)
            
            if isGPU:
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()

            output_lstm = lstm(batch_x)
          
            loss_lstm = loss_func(output_lstm, batch_y)
            
            lstm_train_loss += loss_lstm.item()
            
            
            
            if isGPU:
                lstm_pred = torch.max(output_lstm, 1)[1].cuda()
            else:
                lstm_pred = torch.max(output_lstm, 1)[1]
            
            lstm_train_correct = (lstm_pred == batch_y).sum()
            
            lstm_train_acc += lstm_train_correct.item()
            
            lstm_optimizer.zero_grad()
            
            loss_lstm.backward()
            
            lstm_optimizer.step()
            
        print('LSTM:\n Train Loss: {:.6f}, Accuracy: {:.6f}\n'.format(lstm_train_loss / 
              (len(training_data)), lstm_train_acc / (len(training_data))))
        
        lstm_loss_draw.append(lstm_train_loss/(len(training_data)))
        
        
        # evaluation--------------------------------------------------
        lstm.eval()
        
        lstm_eval_loss = 0.
        
        lstm_eval_acc = 0.
        
        lstm_final_prediction = np.array([])
        lstm_final_test = np.array([])
        lstm_f1_score = []
        lstm_recall = []
        lstm_precision = []
        lstm_accuracy = []
      
        for step, (batch_x, batch_y) in enumerate(test_Loader):
            batch_x = batch_x.view(-1, TIME_STEP, INPUT_SIZE)
            
            if isGPU:
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
    
            output_lstm = lstm(batch_x)
            
            loss_lstm = loss_func(output_lstm, batch_y)
            
            lstm_eval_loss += loss_lstm.item()
            
            lstm_pred = torch.max(output_lstm, 1)[1]
            
            lstm_eval_correct = (lstm_pred == batch_y).sum()
            
            
            if isGPU:
                lstm_pred = torch.max(output_lstm, 1)[1].cuda()
            else:
                lstm_pred = torch.max(output_lstm, 1)[1]
            
            lstm_eval_acc += lstm_eval_correct.item()
                      
            # F1 metrics
            lstm_final_prediction = np.concatenate((lstm_final_prediction, lstm_pred.cpu().numpy()), axis=0)
            lstm_final_test = np.concatenate((lstm_final_test, batch_y), axis=0)
        
        lstm_f1_score.append(sklearn.metrics.f1_score(lstm_final_test, lstm_final_prediction, average='weighted').item())
        lstm_recall.append(sklearn.metrics.recall_score(lstm_final_test, lstm_final_prediction, average='macro').item())
        lstm_precision.append(sklearn.metrics.precision_score(lstm_final_test, lstm_final_prediction, average='weighted').item())
        lstm_accuracy.append(lstm_eval_acc / (len(test_data)))

        
        print('LSTM:\n Test Loss: {:.6f}, Accuracy: {:.6f}'.format(lstm_eval_loss / (len(test_data)), lstm_eval_acc / (len(test_data))))
        
        lstm_test_loss_draw.append(lstm_eval_loss/(len(test_data)))
        
        print('LSTM:\n F1: {}, recall: {}, precision: {}'.format(lstm_f1_score[-1], lstm_recall[-1], lstm_precision[-1]))
        
    
    lstm_test_y = label_binarize(lstm_final_test, classes=[0, 1, 2, 3, 4, 5])
    lstm_pred_y = label_binarize(lstm_final_prediction, classes=[0, 1, 2, 3, 4, 5])
    
    lstm_fpr, lstm_tpr, _ = roc_curve(lstm_test_y.ravel(), lstm_pred_y.ravel())
    lstm_roc_auc = auc(lstm_fpr, lstm_tpr)
    
    
    data_output['lstm']['F1'].append(lstm_f1_score[-1])
    data_output['lstm']['precision'].append(lstm_precision[-1])
    data_output['lstm']['recall'].append(lstm_recall[-1])
    data_output['lstm']['accuracy'].append(lstm_accuracy[-1])
    data_output['lstm']['auc'].append(lstm_roc_auc.item())
    data_output['lstm']['fpr'].append(list(lstm_fpr))
    data_output['lstm']['tpr'].append(list(lstm_tpr))
    data_output['lstm']['test_loss'].append(lstm_test_loss_draw)
    data_output['lstm']['train_loss'].append(lstm_loss_draw)  

------------------fold 1------------------------
LSTM(
  (lstm): LSTM(408, 64, num_layers=10, batch_first=True)
  (out): Linear(in_features=64, out_features=6, bias=True)
)
-----------------------epoch 1----------------------
LSTM:
 Train Loss: 0.003711, Accuracy: 0.148718

LSTM:
 Test Loss: 0.010431, Accuracy: 0.202312
LSTM:
 F1: 0.06808581591818587, recall: 0.16666666666666666, precision: 0.04093020147682849
-----------------------epoch 2----------------------


  _warn_prf(average, modifier, msg_start, len(result))


LSTM:
 Train Loss: 0.003709, Accuracy: 0.148718

LSTM:
 Test Loss: 0.010427, Accuracy: 0.202312
LSTM:
 F1: 0.06808581591818587, recall: 0.16666666666666666, precision: 0.04093020147682849
-----------------------epoch 3----------------------
LSTM:
 Train Loss: 0.003708, Accuracy: 0.148718

LSTM:
 Test Loss: 0.010423, Accuracy: 0.202312
LSTM:
 F1: 0.06808581591818587, recall: 0.16666666666666666, precision: 0.04093020147682849
-----------------------epoch 4----------------------
LSTM:
 Train Loss: 0.003706, Accuracy: 0.148718

LSTM:
 Test Loss: 0.010418, Accuracy: 0.202312
LSTM:
 F1: 0.06808581591818587, recall: 0.16666666666666666, precision: 0.04093020147682849
-----------------------epoch 5----------------------
LSTM:
 Train Loss: 0.003704, Accuracy: 0.148718

LSTM:
 Test Loss: 0.010414, Accuracy: 0.202312
LSTM:
 F1: 0.06808581591818587, recall: 0.16666666666666666, precision: 0.04093020147682849
-----------------------epoch 6----------------------
LSTM:
 Train Loss: 0.003703, Accurac