# Objectives:

1. Implement data preprocessing to summarize the features in the time series. The features hypothesized to be useful are as follows:
    * Band Spectrum Energy (eeg.get_power) (for EEG)
    * Fractal Dimension of Time Series (pyEEG)
    * Skewness and Kurtosis of the distributions (scipy.stats)
    * Regular stats (mean, median, max, min of time series)
    
2. Models to train:
    * Linear Chain Conditional Random Field with structured SVM solver with a 1-slack QP with L1 slack penalty (pystruct)
    * A simple SVM like Andreas (sklearn SVC)
    * A Hidden Markov model?
    
3. Cross Validate:
    * Build a proper function/system for leave-one-subject-out CV
    * Test models and ensembles on it


In [65]:
import importlib
import utils
importlib.reload(utils)
import utils
from utils import *

In [6]:
xtrain_eeg1, xtrain_eeg2, xtrain_emg, ytrain, xtest_eeg1, xtest_eeg2, xtest_emg = load_data()

Loading xtrain...
Shapes: (64800, 512) (64800, 512) (64800, 512)
Loading ytrain...
Shape: (64800, 1)
Loading xtest...
Shapes: (43200, 512) (43200, 512) (43200, 512)


# Model Implementation: Conditional 

In [51]:
idx = 21600*2
xtrain_eeg1_ = xtrain_eeg1[0:idx]
xtrain_eeg2_ = xtrain_eeg2[0:idx]
xtrain_emg_ = xtrain_emg[0:idx]
eeg1_ = process_EEG(xtrain_eeg1_)
eeg2_ = process_EEG(xtrain_eeg2_)
emg_ = process_EMG(xtrain_emg_)
xtrain_ = np.concatenate((eeg1_, eeg2_, emg_), axis=1)
# ytrain_classes = ytrain[0:idx]['y']
ytrain_classes = ytrain.values[0:idx]
ytrain_ = ytrain.values[0:idx]

xtest_eeg1_ = xtrain_eeg1[idx:-1]
xtest_eeg2_ = xtrain_eeg2[idx:-1]
xtest_emg_ = xtrain_emg[idx:-1]
eeg1_ = process_EEG(xtest_eeg1_)
eeg2_ = process_EEG(xtest_eeg2_)
emg_ = process_EEG(xtest_emg_)
xtest_ = np.concatenate((eeg1_, eeg2_, emg_), axis=1)
ytest_ = ytrain.values[idx:-1]

# xtest_eeg1_ = xtrain_eeg1[idx:-1]
# xtest_eeg2_ = xtrain_eeg2[idx:-1]
# xtest_final = 

In [58]:
# CRF

# xtrain_ = np.reshape(xtrain_, (xtrain_.shape[0], 1, xtrain_[0].shape[0])) # Reshape so that it works with CRF
xtrain_crf = np.reshape(xtrain_, (2, -1, xtrain_.shape[1])) # Reshape so that it works with CRF
ytrain_crf = np.reshape(ytrain_, (2, -1)) -1 # Reshape so that it works with CRF
# X_test_crf = X_test.reshape(2, -1, 49)
print(xtrain_crf.shape, ytrain_crf.shape)

print("Starting CRF...")
classes = np.array([1, 2, 3])
lmao = np.reshape(ytrain_classes, (-1,))
weights_crf = compute_class_weight("balanced", list(classes), list(lmao))
weights_crf[0] = weights_crf[0]+2.5
weights_crf[1] = weights_crf[1]+1.5
    
model = ChainCRF(class_weight=weights_crf)
ssvm = OneSlackSSVM(model=model, C=0.5, max_iter=2000)
    
ssvm.fit(xtrain_crf, ytrain_crf)   
   

# Test on the third guy
xtest_crf = np.reshape(xtest_, (1, -1, xtest_.shape[1]))
ytest_crf = np.reshape(ytest_, (1, -1)) -1
print(xtest_crf.shape, ytest_crf.shape)
y_pred_crf = ssvm.predict(xtest_crf)
y_pred_crf = np.asarray(y_pred_crf).reshape(-1) + 1  

print("BMAC:", sklearn.metrics.balanced_accuracy_score(ytest_, y_pred_crf))


# y_pred_filtered_crf = medfilt(y_pred_crf, FILTER_WINDOW)


# print(f"CRF unique predictions {np.unique(y_pred_filtered_crf)}")


(2, 21600, 36) (2, 21600)
Starting CRF...
[1, 2, 3] [2 2 2 ... 1 1 1] [2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2,

(1, 21599, 36) (1, 21599)
BMAC: 0.940688071290236


In [208]:
# Try to make a submission:


y_pred_final = ssvm.predict(xtest_crf)
make_submission("firsttrylol.csv", y_pred_crf)

ValueError: Length of values does not match length of index

In [195]:
xtrain_crf = np.reshape(xtrain_, (3, -1, 24)) # Reshape so that it works with CRF
ytrain_crf = np.reshape(ytrain_, (3, -1, 1)) # Reshape so that it works with CRF
print(xtrain_crf.shape, ytrain_crf.shape)



# xtrain_crf[0].shape
print(len(y_pred_crf))


(3, 21600, 24) (3, 21600, 1)
64800


# CRF Grid Search

In [None]:
Cs = [0.01, 0.1, 0.2, 0.4, 0.5, 0.6, 0.8, 1.0]
weight_shifts = [0, 0.5, 1.0, 1.5]
for c, w in itertools.product(Cs, weight_shifts):
    res = losocv_CRF(xtrain_eeg1, xtrain_eeg2, xtrain_emg, ytrain, C=c, weight_shift=w, fs=128)
    print("C:", c, ", w:", w, ", BMAC (Mean, Std):", (np.mean(res), np.std(res)))        
    print("********************************")










  0%|          | 0/3 [00:00<?, ?it/s][A[A[A[A[A[A[A[A







 33%|███▎      | 1/3 [00:36<01:12, 36.13s/it][A[A[A[A[A[A[A[A

BMAC: 0.8444563081107587


In [8]:
epochs = 21600
num_sub = 3
# Indices of the subjects
sub_indices = [np.arange(0, epochs), np.arange(epochs, epochs*2),np.arange(epochs*2, epochs*3)]


In [9]:
i = 1

In [21]:
train_index = np.concatenate([sub_indices[(i+1)%num_sub], sub_indices[(i+2)%num_sub]])
eeg1_train = xtrain_eeg1.values[train_index]
eeg2_train = xtrain_eeg2.values[train_index]
emg_train = xtrain_emg.values[train_index]
y_train = ytrain.values[train_index]


In [12]:
train_index

array([43200, 43201, 43202, ..., 21597, 21598, 21599])

In [18]:
xtrain_eeg1.values[train_index].shape

(43200, 512)

In [27]:
type(xtrain_eeg1.values)

numpy.ndarray

In [33]:
np.all(xtrain_eeg1.iloc[1:3, :].values == xtrain_eeg1.values[1:3, :])

True

In [66]:
yo = xtrain_eeg1.values[1:3]

In [68]:
yo - np.reshape(np.mean(yo, axis=1), (-1, 1))

array([[ 9.95605469e-05,  1.27560547e-04,  3.02560547e-04, ...,
         9.12560547e-04,  8.02560547e-04,  4.42560547e-04],
       [ 2.02523437e-04, -1.67476563e-04, -7.97476563e-04, ...,
        -5.77476563e-04, -5.07476563e-04, -4.97476563e-04]])

In [69]:
yo

array([[ 6.7e-05,  9.5e-05,  2.7e-04, ...,  8.8e-04,  7.7e-04,  4.1e-04],
       [ 1.6e-04, -2.1e-04, -8.4e-04, ..., -6.2e-04, -5.5e-04, -5.4e-04]])

In [70]:
np.mean(yo, axis=1)

array([-3.25605469e-05, -4.25234375e-05])

In [72]:
1.27560547e-04 + -3.25605469e-05

9.500000009999998e-05