In [1]:
import numpy as np
import scipy as sp
from scipy import fftpack

import pandas as pd

import os
import glob

import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline

In [2]:
path = os.getcwd() + '\\data\\'
extension = 'csv'

os.chdir(path)
titles = glob.glob('*.{}'.format(extension))
print(titles)

['BCI Anonymous Jun 09 12h55.raw.csv', 'BCI subj03 Jun 09 12h47.raw.csv', 'BCI subj04 Jun 09 16h01.raw.csv']


In [28]:
datasets = []
for d in titles:
    data = pd.read_csv(path + d)
    if len(data.columns) == 1:
        data = pd.read_csv(path + d, sep=";")
    data['action'] = data['Stimulus'].apply(lambda x: x.replace(' ', '.').split('.')[0])
    datasets.append(data[data.columns[1:]])

In [37]:
data = datasets[0]
print(list(data.columns))

['Stimulus', 'AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4', 'action']


## Frequency distribution

In [None]:
sb.set_context("paper")
for c in data.columns[1:-1]:
    plt.figure()
    sb.violinplot(data[c], data.action)

## Curve similarity by eye

In [121]:
def plot_confidence(data):
    avg = np.mean(data, axis = 0)
    sd = np.std(data, axis = 0)
    
    plt.plot(avg)
    plt.fill_between(list(range(len(avg))), avg - sd, avg + sd, alpha=.3, facecolor='red')

    
def pre_analysis(data, action, repetitions, wave, stimulus_len = None):
    dt = data[data.action == action].copy()
    dt = dt[wave]
    
    if stimulus_len is None:
        stimulus_len = int(dt.shape[0] / repetitions)
        
    datamatrix = np.zeros([repetitions, stimulus_len])
    for i in range(repetitions):
        datamatrix[i, :] = dt[i*stimulus_len:(i+1)*stimulus_len]
    
    plt.figure()
    plt.title(action)
    plot_confidence(datamatrix)

In [None]:
wave = "AF3"
for action in set(data.action.values):
    pre_analysis(data, action, 10, wave)

## Signal Subsetting

In [137]:
## Split the signals into small subsets with an overlap
def subset_data(data, ss_dim = None, ss_num = 10, overlap = .5, cut_smaller = True):
    if overlap > 1: # in case the value passed is a percentage
        overlap = float(overlap)/100
        
    if ss_dim is None: # either choose the dimension of the subsets or the number
        ss_dim = int(len(data)/(ss_num*overlap)) # by default it will divide the signal in 10 subsets
    
    subsets = []
    i = len(data) - 1
    while i >= 0:
        j = max(i - ss_dim, 0)
        subsets.append(data[j:i])
        i -= int(ss_dim * (1 - overlap))
    
    while cut_smaller and len(subsets[-1]) < ss_dim :
        subsets = subsets[:-1]
        
    return np.array(subsets)

def prepare_data(dataframe, ss_dim = 20, ss_num = 168, overlap = .5):
    dataset = {}
    for action in set(dataframe.action.values):
        a = []
        data = dataframe[dataframe.action == action].copy()
        for c in data.columns[1:-1]:
            a.append(np.array(subset_data(np.array(data[c]), ss_dim, ss_num, overlap, cut_smaller = True)))
        #dataset[action] = [np.asmatrix(d) for d in np.transpose(np.array(a), [1, 2, 0])] # if you want a list of matrixes
        dataset[action] = np.transpose(np.array(a), [1, 2, 0]) # if you want a tensor
    return dataset

In [178]:
dataset = prepare_data(data)

In [180]:
dataset["run"].shape

(283L, 20L, 14L)

## Clustering

## Fourier


In [None]:
dt = data["AF3"]
dt = dt[data.Stimulus == "jump.png"].copy()

In [None]:
FFT = abs(sp.fft(dt))
freqs = sp.fftpack.fftfreq(dt.size, 0.01)
plt.plot(freqs,20*sp.log10(FFT),'.')