# Warning
This notebook is under development

# Experiments on cutting data with lag or lead

This is intended to allow researchers to deal with respiratory EMG data and ventilator data which may not be exactly synchronized, but has a consistent lag-time between the two signals.

## Import libraries

In [None]:
%matplotlib widget
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import scipy
import helper_functions as hf
from config import Config

In [None]:
# get new changes in our library i.e. the tmsisdk

from tmsisdk_lite import Poly5Reader


# Set a collection place for experiments

In [None]:
# do not rerun this cell
big_data_list= []

Below change the path to the root directory where you are keeping your EMGs and ventilator "Draeger" files

# Reruns should be done from this cell as the start

In [None]:
config = Config()
root_emg_directory = config.get_directory('root_emg_directory')

In [None]:
emg_pattern = os.path.join(root_emg_directory, '**/*.Poly5')
emg_and_vent_files = glob.glob(emg_pattern, recursive=True)

emg_files = []
vent_files = []

for file in emg_and_vent_files:
    if 'Draeger' in file:
        vent_files.append(file)
    else:
        emg_files.append(file)

Now you can pick a file from the list, which have been numbered.

In [None]:
list_of_numbers_strung = []
for i in range(len(emg_files)):
    list_of_numbers_strung.append(str(i))


btn = widgets.Dropdown(
    options=list_of_numbers_strung,
    value='0',
    description='Picked File:',
    disabled=False,
)
display(btn)

In [None]:
number_chosen = int(btn.value)
emg_file_chosen = emg_files[number_chosen]
vent_file_chosen = vent_files[number_chosen]
print("The files you chose are:\n", emg_file_chosen, '\n', vent_file_chosen)

In [None]:
# get the poly5 reading
data_emg = Poly5Reader(emg_file_chosen)
data_vent = Poly5Reader(vent_file_chosen)
data_emg_samples = data_emg.samples
emg_fs = data_emg.sample_rate
data_vent_samples = data_vent.samples
vent_fs = data_vent.sample_rate

In [None]:
%matplotlib inline
# set up plot, show
x_emg = data_emg_samples
x_vent = data_vent_samples
fig, axis = plt.subplots(nrows=3, ncols=2, figsize=(6, 6))
axis[0, 0].grid(True)
axis[0, 0].plot(x_emg[0])
axis[0, 0].set(title='leads in EMG')
axis[1, 0].plot(x_emg[1])
axis[2, 0].plot(x_emg[2])
axis[0, 1].set(title='leads in Draeger')
axis[0, 1].grid(True)
axis[0, 1].plot(x_vent[0])
axis[1, 1].plot(x_vent[1])
axis[2, 1].plot(x_vent[2])

That is the whole unfiltered EMG, but you probably want to examine a part. You will also want to examine something filtered down to only the EMG components. Therefore we can later filter off only the EMG components with an ICA in addtion to the filters we will play with.

## Automatic matching, when you know the lag/lead

Now you can input the time difference between the two samples.

In [None]:
dropdown = widgets.Dropdown(
    options=["leads", "lags"],
    value='lags',
    description="The EMG",
    disabled=False,
)

time_difference = widgets.Combobox(
    placeholder='type in number of milliseconds ',
    options=['', ],
    description='time difference'
)


# a VBox container to pack widgets vertically
widgets.VBox(
    [
        dropdown,
        time_difference,
    ]
)

Warning: you must put in a number value for the time difference even if it is zero. 

In [None]:
# start with pseudo-timestamp
vent_frequency = 100
emg_frequency = 2048

dfourth_list_ids = list(range(len(data_vent_samples[2])))
dfourth_list_ids = np.array(dfourth_list_ids)
dfourth_list_ids = dfourth_list_ids/vent_frequency

efourth_list_ids = list(range(len(data_emg_samples[2])))
efourth_list_ids = np.array(efourth_list_ids)
efourth_list_ids = efourth_list_ids/emg_frequency

In [None]:
tsemg= np.vstack((data_emg_samples,efourth_list_ids))
tsdraeger = np.vstack((data_vent_samples,dfourth_list_ids))

In [None]:
tsemg.shape

In [None]:
tsdraeger.shape

In [None]:
len(tsemg[2])

In [None]:
len(tsdraeger[2])

In [None]:
normalizer = len(tsemg[2])
resampled_vent_0 = scipy.signal.resample(data_vent_samples[0], normalizer)
resampled_vent_1 = scipy.signal.resample(data_vent_samples[1], normalizer)
resampled_vent_2 = scipy.signal.resample(data_vent_samples[2], normalizer)
ts_resampled = scipy.signal.resample(dfourth_list_ids, normalizer)

resampled_vent_top = np.vstack((resampled_vent_0, resampled_vent_1))
resampled_vent = np.vstack((resampled_vent_top, resampled_vent_2))
tsresampled_vent = np.vstack((ts_resampled, resampled_vent))

In [None]:
#ts_resampled[5:20]

In [None]:
sample_difference = int((int(time_difference.value)/1000) * emg_frequency)
padding = []
for i in range(sample_difference):
    padding.append('NaN')
out = np.vstack((np.array(padding), np.array(padding)))
out_three = np.vstack((out, np.array(padding)))
out_four = np.vstack((out_three, np.array(padding)))

In [None]:
# must replace zeros in 4th layer of padding with something else. 
# or go with other algorithm

In [None]:
tsemg[0:3]

In [None]:
tsresampled_vent.shape

In [None]:
elag_shifted_data_emg = np.hstack((out_three, tsemg[0:3]))
elag_shifted_data_vent = np.hstack((tsresampled_vent, out_four))

drlag_shifted_data_emg = np.hstack((tsemg, out_four))
drlag_shifted_data_vent = np.hstack((out_three, tsresampled_vent[0:3]))

In [None]:
#elag_shifted_data_emg

In [None]:
#elag_shifted_data_vent

In [None]:
#time_difference = int(time_difference.value)
lead_lag = dropdown.value
if lead_lag == 'lags':
    shifted = np.vstack((elag_shifted_data_emg, elag_shifted_data_vent))

else:
    shifted = np.vstack((drlag_shifted_data_emg, drlag_shifted_data_vent))

In [None]:
# now we could cut off the unmatched ends if we only want that (not advised here but later)
#shifted = shifted[:,sample_difference:-sample_difference]
# we put the timestamp as the last array for the emg-leading array
if lead_lag == 'leads':
    # put timestamp to bottom
    shifted = np.roll(shifted, -1, axis=0)

Now you have an array , called shifted, that has a timestamp row on the bottom, and both emg and draeger signals as they happened. The emg signal will be downsampled. Now let's store what we made with the metadata of the file names encapsulated in our output file name.

In [None]:
output_path = 'output2'
isExist = os.path.exists(output_path)
if not isExist:
    os.makedirs(output_path)
    print("The new directory for the stacked arrays is created at folder:", output_path)
else:
    print("Output files will be sent to the folder named:", output_path)

In [None]:
filename = emg_file_chosen
stripped_filename = filename.replace("\\", "")
stripped_filename = stripped_filename.replace('../', '')
stripped_filename = stripped_filename.replace('.Poly5', '')
file = open(os.path.join(output_path, stripped_filename), "wb")
# save array to the file
np.save(file, shifted)
# close the file
file.close

You can also create a processed signal, and add a certain draeger signal (making a 3 row array including the timestamp)

In order to do this we should throw off our 'Nan' values, and change the type

In [None]:
if sample_difference != 0:
    cut_shifted = shifted[:, sample_difference:-sample_difference]
    cut_shifted = cut_shifted.astype('float64')
else:
    cut_shifted = shifted[:, :]
    cut_shifted = cut_shifted.astype('float64')

Now we will do some minimal processing to create the emg values

In [None]:
emg_processed = hf.emg_bandpass_butter_sample(
    cut_shifted[:3, :], 5, 450, 2048, output='sos')
emg_processed = hf.compute_ICA_two_comp(emg_processed)
emg = hf.pick_more_peaks_array(emg_processed)
emg = hf.emg_highpass_butter(emg, 150, 2048)

Now we can stack our processed emg back on top of the draeger and timestamp

In [None]:
processed_emg_and_vent= np.vstack((emg,cut_shifted[3:,:]))

In [None]:
processed_emg_and_vent

In [None]:
output_path2 = 'output_emg_processed_upsampled'
isExist = os.path.exists(output_path2)
if not isExist:
    os.makedirs(output_path2)
    print("The new directory for the processed stacked arrays is created at folder:", output_path2)
else:
    print("Output files will be sent to the folder named:", output_path2)
filename = emg_file_chosen

stripped_filename = filename.replace("\\", "")
stripped_filename = stripped_filename.replace('../', '')
stripped_filename = stripped_filename.replace('.Poly5', '')
file = open(os.path.join(output_path2, stripped_filename), "wb")
# save array to the file
np.save(file, processed_emg_and_vent)
# close the file
file.close

If this is the time shift for all arrays, it can be done to every sample. To be discussed with scientists.

# But fundamentally, when we downsampled the EMG we got something that doesn't work well. We need to redo this to first get our EMG signal, then downsample