In [2]:
import os
import sys
sys.path.insert(1, os.path.realpath(os.path.pardir))


from utils.augmentations import get_default_transform
from utils import creating_dataset


# Dataset loading & inspection
This notebook shows how to load the datasets used in this challenge and provides some basic statistics about them.

Note that the data includes EMG signal from 8 electrodes in the EMG armband, the predicted variables are the angle of 20 joints in the hand. Inputs are sampled at 200Hz, but the  outputs are intended to be at a subsampled rate of 25Hz (more on that in other notebooks).

Data was acquired in "healthy" and "amputant" subjects (i.e. with limb loss) using the EMG armband in either the left or right arm. 
Your model's predictions will be evaluated on data from one of the two amputant subjects (fedya), but you can use any and all provided data for training your model. For simplicity, the data has been divided in `training` and `test` subsets (not all subjects have `test` data) to evaluate your model's performance during training. The final submission data is held separate and only used when preparing the submission.csv file (see 04_submit_predictions.ipynb).

### Load data
Start by defining a variable to keep track of where the data is saved on your computer and a set of parameters for selecting which data to load. 

In [3]:
DATA_PATH = r"/msc/home/vsharm64/projects/BCI_Kaggle/dataset_v2_blocks/dataset_v2_blocks"

You can decide which data to load (e.g. from both `health` and `amputant` subjects). 
The `test_dataset_list` specifies which subset of the data should be used as test set, and it shouldn't change. You can, however, play around with training your model on different subsets of the available training data. 

You can also define a set of `transform` functions to apply to the data before feeding it to the model, or load the default ones.

In [4]:
data_paths = dict(
    datasets=[DATA_PATH],
    hand_type = ['left', 'right'], # [left, 'right']
    human_type = ['health', 'amputant'], # [amputant, 'health']
    test_dataset_list = ['fedya_tropin_standart_elbow_left']  # don't change this !
)

# define a config object to keep track of data variables
data_config = creating_dataset.DataConfig(**data_paths)

# get transforms
p_transform = 0.1  # probability of applying the transform
transform = get_default_transform(p_transform)

# load the data
train_dataset, test_dataset = creating_dataset.get_datasets(data_config, transform=transform)


Getting val datasets
Number of moves: 72 | Dataset: fedya_tropin_standart_elbow_left
Reorder this dataset fedya_tropin_standart_elbow_left True
Getting train datasets
Number of moves: 72 | Dataset: fedya_tropin_standart_elbow_left
Reorder this dataset fedya_tropin_standart_elbow_left True
Number of moves: 70 | Dataset: valery_first_standart_elbow_left
Reorder this dataset valery_first_standart_elbow_left True
Number of moves: 135 | Dataset: alex_kovalev_standart_elbow_left
Reorder this dataset alex_kovalev_standart_elbow_left True
Number of moves: 72 | Dataset: anna_makarova_standart_elbow_left
Reorder this dataset anna_makarova_standart_elbow_left True
Number of moves: 62 | Dataset: artem_snailbox_standart_elbow_left
Reorder this dataset artem_snailbox_standart_elbow_left True
Number of moves: 144 | Dataset: matthew_antonov_standart_elbow_left
Reorder this dataset matthew_antonov_standart_elbow_left True
Number of moves: 144 | Dataset: misha_korobok_standart_elbow_left
Reorder this da

### Inspect the data

`train_dataset` and `test_dataset` are instances of the `torch.utils.data.ConcatDataset` class. 


The following code shows the number of batches in each set as well as the size of input and outputs. 
Note that inputs are of shape `n_channels x batch_size` while the outputs are of shape `n_angles x downsampled_batchs_size` where `downsampled_batch_size = batch_size / 200 * 25` to account for downsampling of predictions. 

In [5]:
print(f"Train dataset size: {len(train_dataset)}, Test dataset size: {len(test_dataset)}")

X, Y = train_dataset[0]
print(f"X shape: {X.shape}, Y shape: {Y.shape}")

Train dataset size: 99990, Test dataset size: 792
X shape: (8, 256), Y shape: (20, 32)


You can also generate a video of the hand movements.

In [6]:
from utils.hand_visualize import Hand, save_animation
from utils.quats_and_angles import get_quats
import numpy as np


batches = [train_dataset[i] for i in range(10)]
Y = np.concatenate([b[1] for b in batches], axis=1)
quats = get_quats(Y)

hand_gt = Hand(quats)
ani = hand_gt.visualize_all_frames()
save_animation(ani, 'test_vis.gif', fps=25,)   # this will save a .gif file

In [6]:
from utils.hand_visualize import Hand, save_animation
from utils.quats_and_angles import get_quats
import numpy as np


batches = [train_dataset[i] for i in range(10)]
Y = np.concatenate([b[1] for b in batches], axis=1)
quats = get_quats(Y)


In [7]:
Y.shape

(20, 320)

In [19]:
train_dataset[0][1].shape

(20, 32)

In [14]:
batches[0][1].shape

(20, 32)

In [16]:
len(batches[0])

2

In [17]:
quats.shape

(20, 16, 4)

In [31]:
quats[0,:,:]

array([[ 0.        ,  0.        ,  0.        ,  1.        ],
       [-0.04312416,  0.03306272,  0.22938382, -0.971818  ],
       [-0.04322608,  0.03294022,  0.23216326, -0.97115739],
       [-0.04325948,  0.03289993,  0.23307568, -0.97093869],
       [ 0.0222884 , -0.05011607,  0.31909113, -0.94613554],
       [ 0.02184452, -0.05030681,  0.31084577, -0.94887668],
       [ 0.02140371, -0.05049175,  0.30266448, -0.95151809],
       [ 0.18211062, -0.146104  ,  0.33740099, -0.91194842],
       [ 0.17561351, -0.15393693,  0.29721149, -0.92581243],
       [ 0.16801359, -0.16228337,  0.25196479, -0.93912155],
       [ 0.08328987, -0.08916145,  0.36260135, -0.92392277],
       [ 0.08249929, -0.08989446,  0.35441937, -0.9270921 ],
       [ 0.0812002 , -0.0910712 ,  0.34105804, -0.93209011],
       [ 0.69173684, -0.26901042, -0.00736746,  0.67013376],
       [ 0.61882164, -0.29865893, -0.00514572,  0.72652333],
       [-0.66704538,  0.32558318, -0.04838486, -0.6683599 ]])

In [27]:
quats_oi = get_quats(train_dataset[0][1])#.shape
quats_oi.shape

(20, 16, 4)

In [30]:
train_dataset[0][1].shape

(20, 32)

In [28]:
quats_oi

array([[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          1.00000000e+00],
        [-2.63740941e-02, -5.11758018e-02,  2.94953388e-01,
         -9.53775625e-01],
        [-2.97472497e-02, -4.92953454e-02,  2.30498181e-01,
         -9.71367932e-01],
        ...,
        [ 6.83211302e-01, -2.81374852e-01, -4.55036534e-03,
          6.73817337e-01],
        [-6.10578392e-01,  3.08096092e-01,  5.60282909e-03,
         -7.29547417e-01],
        [-6.59792184e-01,  3.32420103e-01, -4.65783970e-02,
         -6.72310644e-01]],

       [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          1.00000000e+00],
        [-4.24015107e-02,  2.87576981e-02,  2.37692171e-01,
         -9.69988422e-01],
        [-4.25741375e-02,  2.85043422e-02,  2.43477586e-01,
         -9.68552327e-01],
        ...,
        [-6.40666822e-01,  3.69219244e-01,  9.15039119e-03,
         -6.73156329e-01],
        [-5.89825092e-01,  3.46907202e-01,  5.88261482e-02,
         -7.26843338e-01],
        [-6.607

In [23]:
get_quats(Y).shape

(20, 16, 4)

In [24]:
train_dataset[0][1].shape

(20, 32)

In [None]:
get_quats

In [25]:
Y.shape

(20, 320)