In [1]:
import h5py
import pandas as pd
import numpy as np
import torch

from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from tqdm import tqdm_notebook as tqdm
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [2]:
df_train = h5py.File("./data/X_train.h5", "r")
x_train = df_train["features"][:183143]
x_test  = df_train["features"][183143:]

In [3]:
y_train = pd.read_csv("./data/y_train.csv").values[:183143, 1].squeeze()
y_test = pd.read_csv("./data/y_train.csv").values[183143:, 1].squeeze()

In [4]:
def get_waves(x_train):
    freq  = 125*np.fft.fftfreq(x_train.shape[1])
    eeg   = np.abs(np.fft.fft(x_train))
    delta = np.sum(eeg[:, (freq > 0.5) & (freq <= 4)], axis = 1)
    theta = np.sum(eeg[:, (freq > 4) & (freq <= 7)], axis = 1)
    alpha = np.sum(eeg[:, (freq > 7) & (freq <= 13)], axis = 1)
    beta  = np.sum(eeg[:, (freq > 13) & (freq <= 30)], axis = 1)
    gamma = np.sum(eeg[:, (freq > 30)], axis = 1)
    return np.array([delta, theta, alpha, beta, gamma])

In [5]:
features = []
features.append(x_train[:, :11])
features = np.array(features)[0]

In [6]:
waves = get_waves(x_train[:, 11:])

In [7]:
waves = np.transpose(waves)

In [12]:
features = preprocessing.scale(np.concatenate([features, waves], axis = 1))

In [13]:
forest = RandomForestClassifier()

In [14]:
forest.fit(features, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [15]:
features = []
features.append(x_test[:, :11])
features = np.array(features)[0]
waves    = get_waves(x_test[:, 11:])
waves    = np.transpose(waves)
features = np.concatenate([features, waves], axis = 1)
features = preprocessing.scale(features)

In [16]:
y_pred  = forest.predict(features)

In [17]:
np.sum(y_pred == y_test)/len(y_test)

0.47689544024155633