In [None]:
import pywt
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# read the data
data = pd.read_csv('../data/training/a_data.csv')
labels = pd.read_csv('../data/training/a_labels.csv')

In [None]:
# group datapoints into bins, corresponding to a seconnd of recording time
data['TimestampToSec'] = data['Timestamp'].astype(int)
grouped = data.groupby('TimestampToSec')

In [None]:
def Energy(coeffs, k):
    return np.sqrt(np.sum(np.array(coeffs[-k]) ** 2)) / len(coeffs[-k])

In [None]:
import pywt
import pywt.data
mode = pywt.Modes.smooth

def signal_decomp(data):
    """Decompose and plot a signal S.
    S = An + Dn + Dn-1 + ... + D1
    """
    w = pywt.Wavelet('db4')
    a = data
    ca = []
    cd = []
    for i in range(5):
        (a, d) = pywt.dwt(a, w, mode)
        ca.append(a)
        cd.append(d)  
    return ca, cd

In [None]:
import scipy.signal    

# CONSTRUCT FEATURES

# for every label, look up the corresponding data
features = []
for l in range(len(labels)):
    time = labels['Timestamp'][l]
    slice = grouped.get_group(time)
    # for every channel
    power_all_channels = []
    # 1-7 EEG, 8th channel is ECG data
    for ch in range(8):
        single_sec_ch = slice['Ch{}'.format(ch)]
        
        # median filter the data
        pre_processed = scipy.signal.medfilt(single_sec_ch, kernel_size=3)  
        
        _, cd = signal_decomp(pre_processed)
        # for every decomp. level
        power = []
        for l in range(5):
            power.append(Energy(cd, l))
            
        # collect power for all channels into one vector 
        power_all_channels.append(power) 
    # currently mean power of the frequency bands over all channels are the only features
    power_vec = np.asarray(power_all_channels).flatten()
    features.append(power_vec)
 

In [None]:
print(np.asarray(features).shape)
target_names = np.unique(labels['Event'])

In [None]:
from sklearn import ensemble
from sklearn import metrics
from sklearn.model_selection import cross_val_predict

## Random Forst
clf = ensemble.RandomForestClassifier(n_estimators = 10, criterion='entropy', class_weight='balanced', n_jobs = -1)

predicted = cross_val_predict(clf, features, labels['Event'], cv=10)

acc = metrics.accuracy_score(labels['Event'], predicted)
print("This is the Score: {}".format(acc))

In [None]:
# visualize false predictions
def vis_clfs(targets, predicted):
    # color coding grayscale
    color = {'stage_q_N34' : 0, 'stage_q_N23': 50, 'stage_q_N12': 100, 'stage_q_REM1': 150, 'stage_q_Wake0': 200}
    label_text = ['N34', 'N23', 'N12', 'REM', 'Wake']
    false_pred = np.where(predicted != targets)
    timepoints = range(0,len(predicted))

    rows = np.ceil((len(predicted) / 500)).astype(int) * 10
    cols = 500
    image = np.ones((rows,cols), dtype=np.int16) * 255

    for timepoint in timepoints:
        x = timepoint % 500 
        y = int(timepoint / 500) * 10

        image[y:y+10,x] = color[predicted[timepoint]]
    
    import matplotlib.patches as mpatches
    plt.figure(frameon=False, figsize=(16,16))  
    plt.title('Classification Results', fontsize=18)
    plt.axis('off')   
    im = plt.imshow(image,cmap=plt.cm.bone, vmin = 0, vmax = 255)
    # get the colors of the values, according to the 
    # colormap used by imshow
    values = [0,50,100,150,200]
    colors = [im.cmap(im.norm(value)) for value in values]
    # create a patch (proxy artist) for every color 
    patches = [ mpatches.Patch(color=colors[i], label="{l}".format(l=label_text[i]) ) for i in range(len(values)) ]
    # put those patched as legend-handles into the legend
    plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., fontsize=18 )
    plt.show()
    

In [None]:
vis_clfs(labels['Event'], predicted)

In [None]:
def smooth(a, n=3) :
    res = np.zeros((5,a.shape[1]-n))
    # sum over the last n timepoints
    for timep in range(a.shape[1]-n):
        if(timep >= n):
            acc_sum = np.sum(a[:,timep-n:timep], axis = 1) 
            res[:,timep] = acc_sum
     
    # assign class according to the most ofen occuring class within last 5 predictions
    classes = np.argmax(res,axis=0)
    return classes

In [None]:
# convert predictions into a one hot vector
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit(predicted)
print(lb.classes_)
onehot_pred = lb.transform(predicted)
#onehot_pred.T

In [None]:
filter_length = 15

mov_pred = smooth(onehot_pred.T, n = filter_length)
t = labels['Event'].as_matrix()[filter_length:]
print(mov_pred.shape)
p = lb.classes_[mov_pred]
vis_clfs(t, p)

acc = metrics.accuracy_score(t, p)
print("This is the Score: {}".format(acc))

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        float_formatter = lambda x: "%.2f" % x
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

class_names, counts = np.unique(labels['Event'], return_counts=True)
    
# Compute confusion matrix
cnf_matrix = confusion_matrix(labels['Event'], predicted)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
#plt.figure()
#plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
#                      title='Normalized confusion matrix')

plt.show()