In [1]:
from collections import Counter
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [10]:
def prepare_data(file_name: str, block_size: int) -> np.ndarray:
    data = np.fromfile(file_name, dtype='float32')
    data = data.reshape(-1, 17)
    data = data[:, :8]
    for i in range(data.shape[1]):
        data[:, i] -= np.mean(data[:, i])
    num_seq = data.shape[0] - (data.shape[0] % block_size)
    data = data[:num_seq]
    data = data.reshape(-1, block_size, 8)
    print (data.shape)
    return data

In [12]:
both = prepare_data('./data_samples/blinking/both.bin', 50)
left = prepare_data('./data_samples/blinking/left.bin', 50)
right = prepare_data('./data_samples/blinking/right.bin', 50)

(245, 50, 8)
(126, 50, 8)
(146, 50, 8)


In [14]:
num_both = both.shape[0]
num_left = left.shape[0]
num_right = right.shape[0]

num_all = num_both + num_left + num_right

In [17]:
labels = np.zeros(shape=(num_all, 1))

labels[:num_both] = 0
labels[num_both:num_both+num_left] = 1
labels[num_both+num_left:] = 2

Counter(labels.flatten())

Counter({0.0: 245, 2.0: 146, 1.0: 126})

In [18]:
all_data = np.concatenate((both, left, right))
all_data.shape

(517, 50, 8)

In [20]:
mean_all = all_data.mean(axis=1)
mean_all.shape, labels.shape

((517, 8), (517, 1))

In [21]:
x_tr, x_te, y_tr, y_te = train_test_split(mean_all, labels)

In [22]:
lr = LogisticRegression()
lr.fit(x_tr, y_tr)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [25]:
predict_lr = lr.predict(x_te)
print(classification_report(y_te, predict_lr))

             precision    recall  f1-score   support

        0.0       0.42      1.00      0.59        55
        1.0       0.00      0.00      0.00        30
        2.0       0.00      0.00      0.00        45

avg / total       0.18      0.42      0.25       130



  'precision', 'predicted', average, warn_for)


In [28]:
rf = RandomForestClassifier()
rf.fit(x_tr, y_tr)

  from IPython.kernel.zmq import kernelapp as app


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [29]:
predict_rf = rf.predict(x_te)
print(classification_report(y_te, predict_rf))

             precision    recall  f1-score   support

        0.0       0.93      0.98      0.96        55
        1.0       0.80      0.93      0.86        30
        2.0       1.00      0.82      0.90        45

avg / total       0.92      0.92      0.92       130



In [30]:
channels = list(range(1, 17))
dict(zip(channels, rf.feature_importances_))

{1: 0.04805428874885687,
 2: 0.09059870097498769,
 3: 0.10962954450506324,
 4: 0.20295496232836058,
 5: 0.1929543506508916,
 6: 0.16225555636035024,
 7: 0.09464550536632106,
 8: 0.09890709106516869}