In [8]:
import numpy as np
from numpy import random
import pandas as pd
from sklearn import neighbors
import matplotlib.pyplot as plt
%matplotlib inline

# Get the data

In [3]:
normal_data = np.loadtxt('Data_case1/ptbdb_normal.csv', delimiter=',')
abnormal_data = np.loadtxt('Data_case1/ptbdb_abnormal.csv', delimiter=',')

## Binary dataset

In [4]:
#Data from the binary dataset
Nn = len(normal_data)
Nn_train = int(Nn/2)
Nn_test = int(0.3*Nn)
normal_train = normal_data[:Nn_train,:-1]
normal_dev = normal_data[Nn_train:Nn_train + Nn_test,:-1]
normal_test = normal_data[Nn_train + Nn_test:,:-1]
Na = len(abnormal_data)
Na_train = int(Na/2)
Na_test = int(0.3*Na)
abnormal_train = abnormal_data[:Na_train,:-1]
abnormal_dev = abnormal_data[Na_train:Na_train + Na_test,:-1]
abnormal_test = abnormal_data[Na_train + Na_test:,:-1]
X_train2 = np.vstack([normal_train, abnormal_train])
y_train2 = np.vstack([[0] * len(normal_train) + [1] * len(abnormal_train)])[0]
X_dev2 = np.vstack([normal_dev, abnormal_dev])
y_dev2 = np.vstack([[0] * len(normal_dev) + [1] * len(abnormal_dev)])[0]
X_traindev2 = np.vstack([X_train2, X_dev2])
y_traindev2 = np.vstack([[0] * len(normal_train) + [1] * len(abnormal_train) + [0] * len(normal_dev) + [1] * len(abnormal_dev)][0])
X_test2 = np.vstack([normal_test, abnormal_test])
y_test2 = np.vstack([[0] * len(normal_test) + [1] * len(abnormal_test)])[0]

In [74]:
def create_new_features(array):
    array[array == 0.0] = np.NaN
    percentile95 = np.nanpercentile(array, 0.95, axis=1)
    percentile05 = np.nanpercentile(array, 0.05, axis=1)
    percentile75 = np.nanpercentile(array, 0.75, axis=1)
    percentile25 = np.nanpercentile(array, 0.25, axis=1)
    median = np.nanmedian(array, axis=1)
    mean = np.nanmean(array, axis=1)
    std = np.nanstd(array, axis=1)
    length = np.array([max([i for i in range(len(array[n,:])) if array[n,i] > 0]) for n in range(len(array[:,0]))])
    new_features = np.vstack([mean, median, std, percentile25, percentile75, percentile05, percentile95, length])
    new_features = np.transpose(new_features)
    return new_features

In [75]:
# if we want to save the datasets with labels
#new_train2 = np.hstack([create_new_features(X_train2), y_train2.reshape(len(y_train2),1)])
#new_dev2 = np.hstack([create_new_features(X_dev2), y_dev2.reshape(len(y_dev2),1)])
#new_test2 = np.hstack([create_new_features(X_test2), y_test2.reshape(len(y_test2),1)])
#new_traindev2 = np.hstack([create_new_features(X_traindev2), y_traindev2.reshape(len(y_traindev2),1)])

newX_train2 = create_new_features(X_train2)
newX_dev2 = create_new_features(X_dev2)
newX_test2 = create_new_features(X_test2)
newX_traindev2 = create_new_features(X_traindev2)


array([0.04732334, 0.06153061, 0.06957325, ..., 0.02538076, 0.10665962,
       0.00282885])

## KNN

In [67]:
knn = neighbors.KNeighborsClassifier(1, weights='uniform')
knn.fit(newX_traindev2, y_traindev2)
Z = knn.predict(newX_test2)
misclassification = 0
misclassified = [0,0]
total = [0,0]
for i, z in enumerate(Z):
    if z != y_test2[i]:
        misclassification += 1
        misclassified[y_test2[i]] += 1
    total[y_test2[i]] += 1
misclassification = misclassification / len(Z)
accuracy = 1 - misclassification
accuracies = [1 - misclassified[i]/total[i] for i in range(2)]
print('total accuracy: ', accuracy)
print('class accuracies: ', accuracies)

total accuracy:  0.8485576923076923
class accuracies:  [0.7555555555555555, 0.884395813510942]


  


## 5-class dataset

In [68]:
# 5 class data
train_data5 = np.loadtxt('Data_case1/mitbih_train.csv', delimiter = ',')
random.shuffle(train_data5)
test_data5 = np.loadtxt('Data_case1/mitbih_train.csv', delimiter = ',')
N = np.int(len(train_data5)/3)
X_dev5 = train_data5[:N,:-1]
y_dev5 = train_data5[:N,-1]
X_train5 = train_data5[N:,:-1]
y_train5 = train_data5[N:,-1]
X_traindev5 = train_data5[:,:-1]
y_traindev5 = train_data5[:,-1]
X_test5 = test_data5[:,:-1]
y_test5 = test_data5[:,-1]

In [69]:
newX_train5 = create_new_features(X_train5)
newX_dev5 = create_new_features(X_dev5)
newX_test5 = create_new_features(X_test5)
newX_traindev5 = create_new_features(X_traindev5)


## KNN

In [70]:
knn = neighbors.KNeighborsClassifier(7, weights='uniform')
knn.fit(newX_traindev5, y_traindev5)
Z = knn.predict(newX_test5)
misclassification = 0
misclassified = [0,0,0,0,0]
total = [0,0,0,0,0]
for i, z in enumerate(Z):
    if z != y_test5[i]:
        misclassification += 1
        misclassified[int(y_test5[i])] += 1
    total[int(y_test5[i])] += 1
misclassification = misclassification / len(Z)
accuracy = 1 - misclassification
accuracies = [1 - misclassified[i]/total[i] for i in range(5)]
print('total accuracy: ', accuracy)
print('class accuracies: ', accuracies)

total accuracy:  0.9144413733238915
class accuracies:  [0.9784603496571043, 0.413855150697256, 0.5428472702142364, 0.3993759750390016, 0.7518270875447053]
