## Prepare Library

In [3]:
import os
import torch
import torchmetrics
from torchmetrics.classification.accuracy import Accuracy
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.autograd import Variable
from tqdm.auto import tqdm
from scipy import integrate
from model import LSTM
from random import sample
import torch.nn as nn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score



## Prepare CPU

In [None]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


## Dataset


In [163]:

Index = ['Channel1', 'Channel2', 'Channel3', 'Channel4', 'Channel5', 'Channel6']

Data_path = 'D:/Workspace/TENG-Signal-Classification/dataset/preprocessed'

cases = os.listdir(Data_path)

random.shuffle(cases)
## Load data
test_set = cases[:40]

valid_set = cases[40:80]

train_set = cases[80:]

print('lenght of train set:', len(train_set))
print('lenght of valid set:', len(valid_set))
print('valid set:', valid_set)
print('lenght of test set:', len(test_set))
print('test set:', test_set)

lenght of train set: 220
lenght of valid set: 40
valid set: ['C6_28', 'C5_18', 'C3_44', 'C2_47', 'C5_8', 'C3_13', 'C5_42', 'C3_32', 'C3_26', 'C3_46', 'C3_41', 'C1_24', 'C2_37', 'C2_25', 'C4_47', 'C2_46', 'C1_1', 'C6_44', 'C3_12', 'C1_16', 'C3_18', 'C5_6', 'C6_29', 'C1_11', 'C5_37', 'C1_4', 'C3_36', 'C1_2', 'C4_26', 'C3_16', 'C4_50', 'C3_40', 'C2_35', 'C6_13', 'C5_24', 'C3_2', 'C2_26', 'C1_9', 'C2_19', 'C1_21']
lenght of test set: 40
test set: ['C1_46', 'C3_7', 'C5_44', 'C5_28', 'C6_22', 'C1_14', 'C4_23', 'C4_25', 'C2_29', 'C3_23', 'C5_9', 'C3_38', 'C5_25', 'C2_49', 'C4_31', 'C4_39', 'C2_50', 'C3_3', 'C1_25', 'C2_32', 'C1_27', 'C2_1', 'C6_3', 'C2_24', 'C4_3', 'C5_45', 'C5_32', 'C1_23', 'C3_19', 'C3_17', 'C2_12', 'C3_43', 'C2_45', 'C4_7', 'C3_34', 'C1_43', 'C6_6', 'C5_49', 'C5_29', 'C3_42']


## Load Data

In [164]:
x_train_data = [] 
y_train_data = []
x_valid_data = []
y_valid_data = []
x_test_data = []
y_test_data = []

print('| loading train set data..... |')
for case in train_set:
    DATA = {}
    label = case.split('_')[0]
    #   Read csv
    file = case + '.csv'
    data = pd.read_csv(Data_path + '/' + case + '/' + file, usecols = Index)
    #   Convert lable into int
    Encode_label = {
            'C1': 0,
            'C2': 1,
            'C3': 2,
            'C4': 3,
            'C5': 4,
            'C6': 5, 
    }
    entropy = pd.value_counts(data['Channel1']) / len(data['Channel1'])
    DATA['C1'] = [sum(data['Channel1'].to_numpy()),np.max((data['Channel1'].to_numpy())),np.min((data['Channel1'].to_numpy())),np.mean((data['Channel1'].to_numpy())),np.std((data['Channel1'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel2']) / len(data['Channel2'])
    DATA['C2'] = [sum(data['Channel2'].to_numpy()),np.max((data['Channel2'].to_numpy())),np.min((data['Channel2'].to_numpy())),np.mean((data['Channel2'].to_numpy())),np.std((data['Channel2'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel3']) / len(data['Channel3'])
    DATA['C3'] = [sum(data['Channel3'].to_numpy()),np.max((data['Channel3'].to_numpy())),np.min((data['Channel3'].to_numpy())),np.mean((data['Channel3'].to_numpy())),np.std((data['Channel3'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel4']) / len(data['Channel4'])
    DATA['C4'] = [sum(data['Channel4'].to_numpy()),np.max((data['Channel4'].to_numpy())),np.min((data['Channel4'].to_numpy())),np.mean((data['Channel4'].to_numpy())),np.std((data['Channel4'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel5']) / len(data['Channel5'])
    DATA['C5'] = [sum(data['Channel5'].to_numpy()),np.max((data['Channel5'].to_numpy())),np.min((data['Channel5'].to_numpy())),np.mean((data['Channel5'].to_numpy())),np.std((data['Channel5'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel6']) / len(data['Channel6'])
    DATA['C6'] = [sum(data['Channel6'].to_numpy()),np.max((data['Channel6'].to_numpy())),np.min((data['Channel6'].to_numpy())),np.mean((data['Channel6'].to_numpy())),np.std((data['Channel6'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    # x_train_data.append(data)
    x_train_data.append(pd.DataFrame.from_dict(DATA, orient='index').T)
    y_train_data.append(Encode_label[label])
print('| done |')

print('| loading valid set data..... |')
for case in valid_set:
    DATA = {}
    label = case.split('_')[0]
    #   Read csv
    file = case + '.csv'
    data = pd.read_csv(Data_path + '/' + case + '/' + file, usecols = Index)
    #   Convert lable into int
    Encode_label = {
            'C1': 0,
            'C2': 1,
            'C3': 2,
            'C4': 3,
            'C5': 4,
            'C6': 5, 
    }
    entropy = pd.value_counts(data['Channel1']) / len(data['Channel1'])
    DATA['C1'] = [sum(data['Channel1'].to_numpy()),np.max((data['Channel1'].to_numpy())),np.min((data['Channel1'].to_numpy())),np.mean((data['Channel1'].to_numpy())),np.std((data['Channel1'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel2']) / len(data['Channel2'])
    DATA['C2'] = [sum(data['Channel2'].to_numpy()),np.max((data['Channel2'].to_numpy())),np.min((data['Channel2'].to_numpy())),np.mean((data['Channel2'].to_numpy())),np.std((data['Channel2'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel3']) / len(data['Channel3'])
    DATA['C3'] = [sum(data['Channel3'].to_numpy()),np.max((data['Channel3'].to_numpy())),np.min((data['Channel3'].to_numpy())),np.mean((data['Channel3'].to_numpy())),np.std((data['Channel3'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel4']) / len(data['Channel4'])
    DATA['C4'] = [sum(data['Channel4'].to_numpy()),np.max((data['Channel4'].to_numpy())),np.min((data['Channel4'].to_numpy())),np.mean((data['Channel4'].to_numpy())),np.std((data['Channel4'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel5']) / len(data['Channel5'])
    DATA['C5'] = [sum(data['Channel5'].to_numpy()),np.max((data['Channel5'].to_numpy())),np.min((data['Channel5'].to_numpy())),np.mean((data['Channel5'].to_numpy())),np.std((data['Channel5'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel6']) / len(data['Channel6'])
    DATA['C6'] = [sum(data['Channel6'].to_numpy()),np.max((data['Channel6'].to_numpy())),np.min((data['Channel6'].to_numpy())),np.mean((data['Channel6'].to_numpy())),np.std((data['Channel6'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    # x_valid_data.append(data)
    x_valid_data.append(pd.DataFrame.from_dict(DATA, orient='index').T)
    y_valid_data.append(Encode_label[label])
print('| done |')

print('| loading test set data..... |')
for case in test_set:
    DATA = {}
    label = case.split('_')[0]
    #   Read csv
    file = case + '.csv'
    data = pd.read_csv(Data_path + '/' + case + '/' + file, usecols = Index)
    #   Convert lable into int
    Encode_label = {
            'C1': 0,
            'C2': 1,
            'C3': 2,
            'C4': 3,
            'C5': 4,
            'C6': 5, 
    }
    entropy = pd.value_counts(data['Channel1']) / len(data['Channel1'])
    DATA['C1'] = [sum(data['Channel1'].to_numpy()),np.max((data['Channel1'].to_numpy())),np.min((data['Channel1'].to_numpy())),np.mean((data['Channel1'].to_numpy())),np.std((data['Channel1'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel2']) / len(data['Channel2'])
    DATA['C2'] = [sum(data['Channel2'].to_numpy()),np.max((data['Channel2'].to_numpy())),np.min((data['Channel2'].to_numpy())),np.mean((data['Channel2'].to_numpy())),np.std((data['Channel2'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel3']) / len(data['Channel3'])
    DATA['C3'] = [sum(data['Channel3'].to_numpy()),np.max((data['Channel3'].to_numpy())),np.min((data['Channel3'].to_numpy())),np.mean((data['Channel3'].to_numpy())),np.std((data['Channel3'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel4']) / len(data['Channel4'])
    DATA['C4'] = [sum(data['Channel4'].to_numpy()),np.max((data['Channel4'].to_numpy())),np.min((data['Channel4'].to_numpy())),np.mean((data['Channel4'].to_numpy())),np.std((data['Channel4'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel5']) / len(data['Channel5'])
    DATA['C5'] = [sum(data['Channel5'].to_numpy()),np.max((data['Channel5'].to_numpy())),np.min((data['Channel5'].to_numpy())),np.mean((data['Channel5'].to_numpy())),np.std((data['Channel5'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    entropy = pd.value_counts(data['Channel6']) / len(data['Channel6'])
    DATA['C6'] = [sum(data['Channel6'].to_numpy()),np.max((data['Channel6'].to_numpy())),np.min((data['Channel6'].to_numpy())),np.mean((data['Channel6'].to_numpy())),np.std((data['Channel6'].to_numpy())),sum(np.log2(entropy) * entropy * (-1))]
    # x_test_data.append(data)
    x_test_data.append(pd.DataFrame.from_dict(DATA, orient='index').T)
    y_test_data.append(Encode_label[label])
print('| done |')

ss = StandardScaler()
mm = MinMaxScaler()

X_train = []
Y_train = []
X_valid = []
Y_valid = []
X_test = []
Y_test = []


print('| train set data to tensor..... |')
## To tensors
for i in range(len(train_set)):
    X = x_train_data[i]
    Y = y_train_data[i]
    X_ss = ss.fit_transform(X)

    X_train.append(np.asarray(X_ss).flatten())
    Y_train.append(Y)
print('| done |')

print('| valid data to tensor..... |')
for i in range(len(valid_set)):
    X = x_valid_data[i]
    Y = y_valid_data[i]
    X_ss = ss.fit_transform(X)
    
    X_valid.append(np.asarray(X_ss).flatten())
    Y_valid.append(Y)
print('| done |')

print('| test data to tensor..... |')
for i in range(len(test_set)):
    X = x_test_data[i]
    Y = y_test_data[i]
    X_ss = ss.fit_transform(X)

    X_test.append(np.asarray(X_ss).flatten())
    Y_test.append(Y)
print('| done |')

| loading train set data..... |
| done |
| loading valid set data..... |
| done |
| loading test set data..... |
| done |
| train set data to tensor..... |
| done |
| valid data to tensor..... |
| done |
| test data to tensor..... |
| done |


## Training(BP)

In [276]:

mlp = MLPClassifier(hidden_layer_sizes=(128,), activation='relu', solver='sgd',  nesterovs_momentum=True, momentum=0.85, alpha=0.0001, max_iter=3000, random_state = 128)
mlp.fit(X_train, np.asarray(Y_train)) 


In [277]:
Y_pred = mlp.predict(X_test)
accuracy = accuracy_score(Y_test,Y_pred)
print(accuracy)

0.8
