In [1]:
# Imports und basic setup
# Try to use the Lab Widgets or if not possible the NB widgets and if nothing works just static plots
try:
    %matplotlib widget
except:
    try:
        %matplotlib notebook
    except:
        %matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np



## Exercise 1.1: Nerve Conduction Velocity

To test for Guillain-Barré syndrome (GBS) the nerve conduction velocity of multiple patients was measured.
The test records the time a electrical signal takes to travel along the forearm of a patient from the elbow to the wrist.
To reduce the potential of measurement errors, each patient was measured 3 times.
The results in $ms$ are provided as the `measurements` DataFrame below.
The same DataFrame also provides the length of the forearm in $cm$.

a) Create a function that takes the data of a all patient as input and calculates the mean and the std of the patient's nerve conduction velocity in $m/s$.

Tip: Try to make use of `numpy`'s vectorisation to perform the calculation without using an explicit loop.

b) Which of these patients might has GBS? Assume a healthy velocity of around 45-70 $m/s$.

In [2]:
measurements = {
    'p1': {'forearm': 45., 'times': np.array([9.7, 9.8, 10.1]) },
    'p2': {'forearm': 47., 'times': np.array([14.2, 12.4, 13.6]) },
    'p3': {'forearm': 50., 'times': np.array([8. , 8.3, 8.2]) },
    'p4': {'forearm': 48., 'times': np.array([10.2,  8. , 10.6]) },
}

# Convert to DataFrame for nicer visualisation
measurements = pd.DataFrame(measurements).T
measurements

Unnamed: 0,forearm,times
p1,45.0,"[9.7, 9.8, 10.1]"
p2,47.0,"[14.2, 12.4, 13.6]"
p3,50.0,"[8.0, 8.3, 8.2]"
p4,48.0,"[10.2, 8.0, 10.6]"


In [3]:
# Extract the values as numpy arrays for faster computations
times = np.vstack(measurements.times.values)
forearms = np.array(measurements.forearm.tolist())
print(times)
print(forearms)

[[ 9.7  9.8 10.1]
 [14.2 12.4 13.6]
 [ 8.   8.3  8.2]
 [10.2  8.  10.6]]
[45. 47. 50. 48.]


In [22]:
def nerve_conduction_velocity(times, forearm_length):
    # TODO: YOUR CODE HERE
    num_data = len(forearm_length)
    
    vel_mean_list = []
    vel_std_list = []
    for i in range(0, num_data):
        vel = 1/(times[i]/forearms[i])*10
        vel_mean_list.append(np.mean(vel))
        vel_std_list.append(np.std(vel))
    vel_mean = np.array(vel_mean_list)
    vel_std = np.array(vel_std_list)
    
    return vel_mean, vel_std

In [23]:
# We are passing the data of all patients into the function at once
vel_mean, vel_std = nerve_conduction_velocity(times, forearms)

# Add the new columns back to the DataFrame for nicer visualisation
measurements['vel_mean'] = vel_mean
measurements['vel_std'] = vel_std

measurements

Unnamed: 0,forearm,times,vel_mean,vel_std
p1,45.0,"[9.7, 9.8, 10.1]",45.621525,0.778889
p2,47.0,"[14.2, 12.4, 13.6]",35.18688,2.011131
p3,50.0,"[8.0, 8.3, 8.2]",61.238858,0.940846
p4,48.0,"[10.2, 8.0, 10.6]",50.780614,6.559277


In [26]:
# Create a new column that indicates if a patient has normal or abnormal nerve conduction velocity
# TODO: YOUR CODE HERE
num = len(measurements)

condition_list = []
for i in range(0, num):
    mean = vel_mean[i]
    if mean<45 or mean>70:
        condition_list.append('abnormal')
    else:
        condition_list.append('normal')
condition = np.array(condition_list)
measurements['condition'] = condition

measurements

Unnamed: 0,forearm,times,vel_mean,vel_std,condition
p1,45.0,"[9.7, 9.8, 10.1]",45.621525,0.778889,normal
p2,47.0,"[14.2, 12.4, 13.6]",35.18688,2.011131,abnormal
p3,50.0,"[8.0, 8.3, 8.2]",61.238858,0.940846,normal
p4,48.0,"[10.2, 8.0, 10.6]",50.780614,6.559277,normal


## Exercise 2.1: Import Data and Visualisation

The two most common data formats you will encounter in biosignal analysis are `csv` and `hdf5`.
The former is usually used for smaller amounts of data that fit into a table format.
The big advantage of `csv` files is that you can view them with any text editor.
However, `csv` files tend to be large compared to other file formats, because the stored data is not compressed.

This is where HDF5 shines. It uses a compressed format and can store metadata together with the raw measurement values.
However, you need special readers to open HDF5 files.

Python can handle both filetypes easily.
CSV is supported by Pandas (and Numpy) directly and the package `h5py` can be used to load HDF5 files.
Have a look at the supplementary notebook `HDF5_reminder.ipynb` to learn about the latter.

a) In the folder `./data` 3 biosignals are stored in different file formats.
Load each of them by filling in the missing lines of code below.

b) Adapt the index of each DataFrame so that it represents seconds and not samples. The sampling information is provided in the skeleton code.

c) Plot each Biosignal with an appropriate x axis. Tip: You can use the plot functionality of pandas to easily plot the DataFrame.

d) Can you guess which type of biosignal each one is? Give each plot a title with the name of the biosignal.


In [27]:
from pathlib import Path  # We use this so that the path works on Win and Mac

In [80]:
# Dataset 1
ds1_path = Path('./data/signal1.csv')

# Load dataset 1 into the variable ds1 as pandas DataFrame
# TODO: YOUR CODE HERE
ds1 = pd.read_csv(ds1_path)

# Adapt the index to seconds. Signal 1: sampling_rate = 204.8 Hz
s1_sampling_rate = 204.8 # Hz
# TODO: YOUR CODE HERE
n_samples = np.array(ds1.loc[:, 'n_samples'])
time = n_samples/s1_sampling_rate
print('time: ', time)
# ds1.reindex(list(time))
# ds1

ds1.index.name = 'time [s]'
ds1.head()


time:  [488.28125    488.28613281 488.29101562 ... 507.99804688 508.00292969
 508.0078125 ]


Unnamed: 0_level_0,n_samples,gyr_x,gyr_y,gyr_z,acc_x,acc_y,acc_z
time [s],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,100000,-50.537109,362.854004,-25.817871,0.25,-0.332031,2.442871
1,100001,-55.847168,352.783203,-36.010742,0.249512,-0.270508,2.516602
2,100002,-60.302734,341.552734,-44.921875,0.306641,-0.209961,2.681152
3,100003,-58.410645,330.01709,-52.246094,0.321777,-0.140625,2.85791
4,100004,-55.114746,322.08252,-58.71582,0.296387,-0.07959,2.981934


In [61]:
# Dataset 2
ds2_path = Path('./data/signal2.txt')

# Load dataset 1 into the variable ds2 as pandas DataFrame
# TODO: YOUR CODE HERE
ds2 = pd.read_csv(ds2_path)

# Adapt the index to seconds. Signal 2: sampling_time = 0.48ms
s2_sampling_time = 0.48 # ms
# TODO: YOUR CODE HERE

ds2
# ds2.index.name = 'time [s]'
# ds2.head()

Unnamed: 0,-1478.8
0,-1478.8
1,-1478.8
2,-1478.8
3,-1478.8
4,-1478.8
...,...
108266,1514.6
108267,1514.6
108268,1514.6
108269,1514.6


In [79]:
import h5py
# Dataset 3
ds3_path = Path('./data/signal3.h5')

# Load dataset 3 and store the "tibialis" recording in the variable ds3 as DataFrame
# TODO: YOUR CODE HERE
file = h5py.File(ds3_path)
ds3 = file['trial1/muscles/tibialis']

# Adapt the index to seconds. Signal 3: Extract the sampling rate from the h5 file.
# TODO: YOUR CODE HERE
sampling_rate = ds3.attrs['sampling rate']
values = ds3[()]
values
    
# ds3.index.name = 'time [s]'
# ds3.head()

array([ 0.17791748,  0.17295837,  0.19485474, ..., -0.027771  ,
       -0.0151825 , -0.00526428], dtype=float32)

In [None]:
# Plot Signal 1
# TODO: YOUR CODE HERE




In [None]:
# Plot Signal 2
# TODO: YOUR CODE HERE



In [None]:
# Plot Signal 3
# TODO: YOUR CODE HERE

