In [None]:
# For auto-reloading external modules
%load_ext autoreload
%autoreload 2
import tensorflow as tf
import numpy as np
import pandas as pd
import conv_net as cnn
from utils import data_utils as du
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import config

### Data Preprocessing

In [None]:
# 1. Convert flatten raw data to structured raw data
du.split_datafiles('dataset/raw_flattened/')

In [None]:
# 2. Convert raw data into datapoints and split train/test datasets
du.maybe_preprocess_dataset(3, force=True)

### Loading the dataset

In [None]:
train = du.load_dataset('train')
test = du.load_dataset('test')

print 'train dataset has shape ', train[0].shape
print 'test dataset has shape ', test[0].shape

### Train and Tweak

In [None]:
train_size = train[0].shape[0]
idx = np.random.permutation(train_size)
dataset = train[0][idx], train[1][idx]

In [None]:
val_folds = 7
kf = KFold(n_splits=val_folds)
train_idx, val_idx = kf.split(dataset[0]).next()

train, val = du.get_batch(dataset, indices=train_idx), du.get_batch(dataset, indices=val_idx)
cnt = np.zeros(10)
for idx in np.argmax(val[1], axis=1):
    cnt[idx] += 1
print cnt
clf = cnn.ConvNetClassifier()
clf.fit(train, val=val, out_dir='log/train2/', verbose=True)

In [None]:
train_acc = clf.accuracy(train)
val_acc = clf.accuracy(val)

print 'accuracy on train dataset = %.3f' % train_acc
print 'accuracy on val dataset = %.3f' % val_acc

In [None]:
conf_mat = clf.confusion_matrix(train)
print 'Confusion Matrix'
print conf_mat
print
print 'Accuracies'
for idx in range(10):
    s = np.sum(conf_mat[idx])
    if(s == 0):
        continue
    print conf_mat[idx][idx] * 1.0 / s
print
print 'Precisions'
for idx in range(10):
    s = np.sum(conf_mat[:,idx])
    if(s == 0):
        continue
    print conf_mat[idx][idx] * 1.0 / s

## Plotting

In [None]:
# Plotting functions
def plot_emg_channels(datapoint, vers1, vers2, title):
    
    x, y = range(config.sampling_rate), datapoint
    xv1, yv1 = range(config.sampling_rate), vers1
    xv2, yv2 = range(config.sampling_rate), vers2
    f, sub = plt.subplots(nrows=4, ncols=2)
    for c in range(8):
        sub[c / 2][c % 2].plot(xv1, yv1[:, c], color='red')
        sub[c / 2][c % 2].plot(xv2, yv2[:, c], color='green')
#         sub[c / 2][c % 2].plot(x, y[:, c])
        sub[c / 2][c % 2].grid()
    plt.legend()
    plt.suptitle(title)
    plt.show()
    
def get_data(path):
    return pd.read_csv(path).iloc[:, 1:].as_matrix()
    
def plot_loss_function(name):
    loss_data = get_data('plotting/data_csv/' + name + '/loss.csv')
    x, y = loss_data[:, 0], loss_data[:, 1]
    plt.plot(x, y, color='#581845')
    plt.title('Loss Function')
    plt.xlabel('steps')
    plt.ylabel('loss')
#     plt.axis([0, 5000, 0, 1.5])
    plt.grid(True)
    plt.show()
    
def plot_accuracies(name):
    train_data = get_data('plotting/data_csv/' + name + '/train_acc.csv')
    val_data = get_data('plotting/data_csv/' + name + '/val_acc.csv')
    x, y = train_data[:, 0], train_data[:, 1]
    plt.plot(x, y, color='#009C17', label='training')
    x, y = val_data[:, 0], val_data[:, 1]
    plt.plot(x, y, color='#01B8B5', label='validation')
    plt.title('Training Vs. Validation')
    plt.legend(bbox_to_anchor=(0., 0.8, 1., .102))
    plt.xlabel('steps')
    plt.ylabel('accuracy')
    plt.grid(True)
    plt.show()

In [None]:
# run plotting here
plot_loss_function('acts789')
plot_accuracies('acts789')

## Test the model

In [None]:
test_acc = clf.accuracy(test)

print 'accuracy on train dataset = %.3f' % train_acc
print 'accuracy on test dataset = %.3f' % test_acc
print 'confusion matrix'
print clf.confusion_matrix(test)

In [None]:
def get_records(activity_class, count=-1):
    train = du.load_dataset('train')
    result = np.empty((0, 128, 8))
    for idx in range(train[0].shape[0]):
        if(train[1][idx][activity_class] == 1):
            dim1, dim2 = train[0][idx].shape
            datapoint = np.reshape(train[0][idx], (1, dim1, dim2))
            result = np.concatenate((result, datapoint))
            count -= 1
            if(count == 0):
                break
    return result

In [None]:
res1 = get_records(2)
res2 = get_records(3)

In [None]:
avg = np.mean(res, axis=0)

idx1 = np.random.choice(res1.shape[0], 1)
idx2 = np.random.choice(res2.shape[0], 1)

plot_emg_channels(avg, res1[idx1[0]], res2[idx2[0]], '')