In [None]:
import pywt
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
data = pd.read_csv('../data/training/a_data.csv')
labels = pd.read_csv('../data/training/a_labels.csv')

In [None]:
print(repr(labels['Timestamp'][0]))
print(repr(data['Timestamp'][0]))

In [None]:
data['TimestampToSec'] = data['Timestamp'].astype(int)
grouped = data.groupby('TimestampToSec')

In [None]:

single_sec_data = grouped.get_group(1489016350)

In [None]:
single_sec_ch = single_sec_data['Ch0']

plt.plot(single_sec_ch)
plt.show()

In [None]:
import pywt
import pywt.data

def Energy(coeffs, k):
    return np.sqrt(np.sum(np.array(coeffs[-k]) ** 2)) / len(coeffs[-k])

mode = pywt.Modes.smooth

def plot_signal_decomp(data, w, title):
    """Decompose and plot a signal S.
    S = An + Dn + Dn-1 + ... + D1
    """
    w = pywt.Wavelet(w)
    a = data
    ca = []
    cd = []
    for i in range(5):
        (a, d) = pywt.dwt(a, w, mode)
        ca.append(a)
        cd.append(d)    
    
    # calculate Energy of the detailed coefficients,
    # i.e. how present is the corresponding freq spectrum in the signal
    power = [] 
    for i in range(5):
        power.append(Energy(cd, i))
        
    print(power)
        
    rec_a = []
    rec_d = []

    for i, coeff in enumerate(ca):
        coeff_list = [coeff, None] + [None] * i
        rec_a.append(pywt.waverec(coeff_list, w))

    for i, coeff in enumerate(cd):
        coeff_list = [None, coeff] + [None] * i
        rec_d.append(pywt.waverec(coeff_list, w))

    fig = plt.figure()
    ax_main = fig.add_subplot(len(rec_a) + 1, 1, 1)
    ax_main.set_title(title)
    ax_main.plot(data)
    ax_main.set_xlim(0, len(data) - 1)

    for i, y in enumerate(rec_a):
        ax = fig.add_subplot(len(rec_a) + 1, 2, 3 + i * 2)
        ax.plot(y, 'r')
        ax.set_xlim(0, len(y) - 1)
        ax.set_ylabel("A%d" % (i + 1))

    for i, y in enumerate(rec_d):
        ax = fig.add_subplot(len(rec_d) + 1, 2, 4 + i * 2)
        ax.plot(y, 'g')
        ax.set_xlim(0, len(y) - 1)
        ax.set_ylabel("D%d" % (i + 1))


In [None]:
plot_signal_decomp(single_sec_ch, 'db4', "Single Sec single Channel EEG data")
plt.show()

In [None]:
def signal_decomp(data):
    """Decompose and plot a signal S.
    S = An + Dn + Dn-1 + ... + D1
    """
    w = pywt.Wavelet('db4')
    a = data
    ca = []
    cd = []
    for i in range(5):
        (a, d) = pywt.dwt(a, w, mode)
        ca.append(a)
        cd.append(d)  
    return cd

# CONSTRUCT FEATURES
# for every label, look up the corresponding data
features = []
for l in range(len(labels)):
    time = labels['Timestamp'][l]
    slice = grouped.get_group(time)
    # for every channel
    power_all_channels = []
    # OMit the last (8th channel), which is ECG data
    for ch in range(7):
        single_sec_ch = slice['Ch{}'.format(ch)]
        cd = signal_decomp(single_sec_ch)
        # for every decomp. level
        power = []
        for l in range(5):
            power.append(Energy(cd, l))
            
        # normalize the signal energies
        power = power / np.sum(power)
        power_all_channels.append(power)
    mean_power = np.mean(power_all_channels, axis=0)
    # currently mean power of the frequency bands over all channels are the only features
    features.append(mean_power)


In [None]:
print(np.asarray(features).shape)
feature_names= ['mean_d1, mean_d2, mean_d3, mean_d4, mean_d5']
target_names = np.unique(labels['Event'])

In [None]:
from sklearn import tree
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
#from sklearn import svm
#clf = svm.SVC(kernel='linear', C=1)
clf = tree.DecisionTreeClassifier(criterion='entropy', class_weight='balanced')

clf = clf.fit(features, labels['Event'])
#print(clf.feature_importances_)

predicted = cross_val_predict(clf, features, labels['Event'], cv=10)

acc = metrics.accuracy_score(labels['Event'], predicted)
print("This is the Score of a sigle tree: {}".format(acc))


In [None]:
class_names, counts = np.unique(labels['Event'], return_counts=True)
# class distribution in input data
plt.bar(range(len(class_names)), counts)
plt.xticks(range(len(class_names)),class_names, rotation=70)

plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import itertools


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        float_formatter = lambda x: "%.2f" % x
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(labels['Event'], predicted)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
#plt.figure()
#plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
#                      title='Normalized confusion matrix')

plt.show()