In [28]:
import numpy as np
import scipy.io as sio
import pandas
import os

def transform_features(folder, commands_file):
    """
    Return a matrix filled with data samples from the
    files of the given folder, and return a vector with 
    the corresponding labels.
    """
    X = []
    Y = []
    commands = pandas.read_csv(commands_file)
    
    for filename in os.listdir(folder):
        data = sio.loadmat(os.path.join(folder,filename))['data']
        file_info = filename.split('_')
        print(file_info)
        data = np.swapaxes(data,0,2)
        for i in range(len(data)):
            X.append(data[i].flatten())
        Y.extend(commands[file_info[2]])
        
    return (np.matrix(X), np.array(Y))
        
train_x, train_y = transform_features('data/train','data/versions.csv')
test_x, test_y = transform_features('data/test', 'data/versions.csv')

['01-12-2016', '14.29', 'v2', 'ppn1', 'meting2', 'filtered', 'cut.mat']
['17-11-2016', '18.24', 'v1', 'ppn1', 'meting2', 'filtered', 'cut.mat']
['01-12-2016', '18.02', 'v2', 'ppn1', 'meting4', 'filtered', 'cut.mat']
['17-11-2016', '18.42', 'v1', 'ppn1', 'meting3', 'filtered', 'cut.mat']
['01-12-2016', '14.50', 'v2', 'ppn1', 'meting3', 'filtered', 'cut.mat']
['01-12-2016', '14.05', 'v2', 'ppn1', 'meting1', 'filtered', 'cut.mat']


In [33]:
from sklearn import linear_model

def learn_from_data(X, Y, alg='log'):

    if alg == 'log':
        model = linear_model.LogisticRegression(C=1.0)
        model.fit(X,Y)
    elif alg == 'nn':
        model = Classifier(
            layers=[
                Layer("Rectifier", units=100),
                Layer("Sigmoid")],
            learning_rate=0.002,
            n_iter=1000)
        model.fit(X, Y)

    return model

train_x, train_y = transform_features('data/train','data/versions.csv')
test_x, test_y = transform_features('data/test', 'data/versions.csv')

['01-12-2016', '14.29', 'v2', 'ppn1', 'meting2', 'filtered', 'cut.mat']
['17-11-2016', '18.24', 'v1', 'ppn1', 'meting2', 'filtered', 'cut.mat']
['01-12-2016', '18.02', 'v2', 'ppn1', 'meting4', 'filtered', 'cut.mat']
['17-11-2016', '18.42', 'v1', 'ppn1', 'meting3', 'filtered', 'cut.mat']
['01-12-2016', '14.50', 'v2', 'ppn1', 'meting3', 'filtered', 'cut.mat']
['01-12-2016', '14.05', 'v2', 'ppn1', 'meting1', 'filtered', 'cut.mat']


In [36]:
clf = learn_from_data(train_x, train_y, 'log')

predictions = clf.predict(test_x)

acc = 0

for i in range(len(predictions)):
    if predictions[i] == test_y[i]:
        acc += 1

print(acc / len(test_y))
print(predictions)

0.65
['rechts' 'rechts' 'links' 'links' 'links' 'rechts' 'rechts' 'links'
 'rechts' 'rechts' 'links' 'rechts' 'rechts' 'links' 'links' 'rechts'
 'links' 'rechts' 'rechts' 'links' 'links' 'rechts' 'links' 'links' 'links'
 'rechts' 'rechts' 'rechts' 'links' 'links' 'links' 'links' 'links' 'links'
 'links' 'links' 'links' 'rechts' 'links' 'links']


In [35]:
def filter_commands(X, Y, commands):
    indices = [i 
               for i, value in enumerate(Y) 
               if value in commands
              ]
    
    X = np.take(X,indices)
    Y = np.take(Y,indices)
    print(Y.shape)
    return (np.transpose(X),Y)

train_x, train_y = filter_commands(train_x, train_y, ['links','rechts'])
print(train_x.shape)
test_x, test_y = filter_commands(test_x, test_y, ['links','rechts'])

(200,)
(200, 1)
(40,)
