# EMGEPN100
# An example for datasets with multiple hardware (sampling rates).

In [None]:
import libemg
from libemg.datasets import get_dataset_list
import numpy as np 

# The dataset in 'get_dataset_list'

In [None]:
'EMGEPN100' in get_dataset_list(cross_user=True)

True

In [None]:
'EMGEPN100' in get_dataset_list(cross_user=False)

True

# Prepare data

##### Here we process the .mat files into .h5 format (done once and saved), and then prepare the data, with data being segmented based on the 'pointGestureBegins' index provided by the dataset (EMGEPN100).

In [None]:
emg_epn100 = libemg.datasets.EMGEPN100() # or get_dataset_list(cross_user=True)['EMGEPN100']()
dataset = emg_epn100.prepare_data(split=True, segment=True, relabel_seg=None, 
                                    channel_last=True, subjects=None)['All']


Please cite: https://doi.org/10.3390/s22249613


=== Processing split: training ===
Starting user_001 -> subject=0
Finished user subject=0 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_001.h5
Starting user_002 -> subject=1
Finished user subject=1 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_002.h5
Starting user_003 -> subject=2
Finished user subject=2 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_003.h5
Starting user_004 -> subject=3
Finished user subject=3 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_004.h5
Starting user_005 -> subject=4
Finished user subject=4 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_005.h5
Starting user_006 -> subject=5
Finished user subject=5 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_006.h5
Starting user_007 -> subject=6
Finished user subject=6 | reps extracted=360 | output=DATASET_85PROCESSED\training\user_007.h5
Starting user_008 -> subject=7
Fi

# Windowing

##### Simple windowing while ignoring the sampling rate differences. All windows will have similar shapes but logically not recommended for datasets with multiple sampling rates.

In [None]:
dataset = dataset.isolate_data("subjects", list(range(10)), fast=True)
windows, meta = dataset.parse_windows(20, 20)
windows.shape

(304244, 8, 20)

##### Here we set multi_rate=True so the given window size and window increment will be time based (ms). The actual window size will be calculated based on frequency in the sampling_rate_key.
##### The output will be a list of reps for the windows, and is non rectangular due to different sampling rates and fixed time-based window size. The metadata file, will have a list of the same size as windows, for each of the keys.

In [None]:
# Time based windowing
windows, meta = dataset.parse_windows(250,      # ms
                                        20,     # ms
                                        multi_rate=True, 
                                        sampling_rate_key='sampling_rates')

In [None]:
len(windows), len(meta['sampling_rates']), len(meta['classes'])

(3600, 3600, 3600)

##### Here we see that there are two different Time axis shapes: 125 and 50, both 250ms for 500 Hz and 200 Hz sensors respectively.

In [None]:
np.unique([w.shape[-1] for w in windows])

array([ 50, 125])

##### We can also easily isolate different sensors with different sampling rates so we can use the normal window parser for rectangular batch of windows with fixed time-based window size

In [None]:
# Window size of 40 @ 500 Hz sensor = 80 ms windows

dataset_g = dataset.isolate_data("devices", [emg_epn100.get_device_ID('gForce')], fast=True)
windows, meta = dataset_g.parse_windows(40, 20)
windows.shape

(288025, 8, 40)

In [None]:
# Window size of 40 @ 200 Hz sensor = 200 ms windows

dataset_m = dataset.isolate_data("devices", [emg_epn100.get_device_ID('myo')], fast=True)
windows, meta = dataset_m.parse_windows(40, 20)
windows.shape

(12619, 8, 40)