In [None]:
import torch
import numpy as np
import scipy.io



mat = scipy.io.loadmat('eyeliddata.mat', simplify_cells = True)
torch.cuda.is_available()

# **Data Reorganization** <br>
Going from provided matlab file to train-test-val splits

For a single trial, 

- Marker kinematics are provided at 400 Hz, shape (300, 3) --> 0.75 s
- EMG information is provided at 6103.5 Hz, shape (4564,) --> 0.7478 s

Marker and EMG data are time-synced. Blinking starts at around 0.25 seconds, which corresponds to point 100 for the kinematics data and point 1525 in the EMG data

Since only electrodes u1, u2, u3, u4, and t2 were used in common across all participants, our input to the model (without spectral preprocessing) will be of shape (4564, N, 5). Since only markerset x1, x2, A, B, C, D, E were used in common across all participants, our labels will be of shape (N, 7, 300, 3). In other words, $x \in R^{T_{\mathrm{in}}\times N \times C_{\mathrm{in}}}$, where $T_{\mathrm{in}}$ is the number of timesteps (4564, in this case), N is the batch size, and $C_{\mathrm{in}}$ is the number of channels (5, in this case), while $y \in R^{T_{\mathrm{out}}\times 3\times N \times C_{\mathrm{out}}}$, where where $T_{\mathrm{out}}$ is the number of timesteps (300 x 3, in this case), N is the batch size, and $C_{\mathrm{out}}$ is the number of channels (7, in this case).

To reorganize the data for train-test-val splits, the Nth data/label pair should contain the time series for emg electrodes u1, u2, u3, u4, t2; the timeseries for markerset x1, x2, A, B, C, D, E; a boolean hyperparameter indicating right vs. left eye; and an identifying string combining the subject, blink type, and trial number (ie: 'sub1_spon#23')

In [83]:
eye_bool = np.asarray([1, 1, 0, 1, 1, 0, 0, 0])
electrode_list = ['u1', 'u2', 'u3', 'u4', 't2']
marker_list = ['x1', 'x2', 'u1', 'u2', 'u3', 'u4', 'u5']




data = np.zeros((4564, 1, 5))
for blink_key in mat['ForSamantha']['emg_with_notchfilter']:
    for sub_key in mat['ForSamantha']['emg_with_notchfilter'][blink_key]:
        trial = []
        for electrode_key in electrode_list:
            trial.append(mat['ForSamantha']['emg_with_notchfilter'][blink_key][sub_key][electrode_key])
        trial = np.dstack(trial) #reshape to (4564, T, 5) where T is number of trials
        data = np.hstack((data, trial)) #stack all trials along 1st axis --> (4564, N, 5)
    
identifier = []
label = np.zeros((300, 3, 1, 7))
for blink_key in mat['ForSamantha']['kinem']:
    for sub_key in mat['ForSamantha']['kinem'][blink_key]:
        trial = []
        for marker_key in marker_list:
            trial.append(mat['ForSamantha']['kinem'][blink_key][sub_key][marker_key])
        trial = np.stack(trial, axis = -1) #reshape to (300, 3, T, 7) where T is number of trials
        label = np.dstack((label, trial)) #stack all trials along 2nd axis --> (300, 3, N, 7)
        
        for num in range(trial.shape[1]):
            id_string = sub_key + '_' + blink_key + '#' + str(num + 1) 
            id_eye = eye_bool[int(sub_key.split('b')[1]) - 1]
            identifier.append((id_string, id_eye))


X = data[:,1:,:] 
y = label[:,1:,:,:]
identifier = np.asarray(identifier)

print(y.shape)
  
    




ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 300 and the array at index 1 has size 200

In [108]:
#print(identifier[-1])

#for key in mat["ForSamantha"]['kinem']:
   # print(key)

print(mat["ForSamantha"]['kinem']['forced']['sub5']['u2'].shape)

(130, 3, 7)
