# Libraries

In [1]:
import pyedflib
import plotly.express as px
from pathlib import Path
import os
import pandas as pd
import numpy as np
import EDF_wrapper
import filters
import re

In [2]:
directory = Path("data/edf/")
files = EDF_wrapper.read_files_from_dir(directory, load_files=True)
files

[{'filepath': 'data\\edf\\1-1-Bewegung.bdf',
  'signals': array([[-2.65347050e+00, -4.09816558e+00, -4.09922416e+00, ...,
           1.10268599e-05,  1.10268599e-05,  1.10268599e-05],
         [-1.54233793e+00, -1.44602933e+00, -1.44344905e+00, ...,
           1.10268599e-05,  1.10268599e-05,  1.10268599e-05],
         [-3.29887007e+00,  4.98973220e+00,  4.99647129e+00, ...,
           7.58485959e-06,  7.58485959e-06,  7.58485959e-06],
         [-4.99413858e+00, -5.02268751e-02, -4.44226541e-02, ...,
           6.99207944e-06,  6.99207944e-06,  6.99207944e-06],
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00]]),
  'signal_headers': [{'label': 'EMG 1',
    'dimension': 'mV',
    'sample_rate': 4000.0,
    'sample_frequency': 4000.0,
    'physical_max': 185.0,
    'physical_min': -185.0,
    'digital_max': 8388607,
    'digital_min': -8388608,
    'prefilter': '',
    'transducer': 'transkutan'},
   {'label': '

In [6]:
def create_swallows_df(file, fileList=False):
    swallow = iter(filter(lambda x : "S_" in x[-1], file["header"]["annotations"]))

    rows = {"set": [], "filepath": [], "category": [], "sample": [], "start_time": [], "stop_time": []}

    for ann in swallow:
        start_time, _, desc = ann
        stop_time, _, _ = next(swallow)
        
        if not fileList:
            rows["set"].append(1)
            rows["filepath"].append(Path(file["filepath"]).name)
            rows["category"].append(None)
            rows["sample"].append(desc)
            rows["start_time"].append(start_time)
            rows["stop_time"].append(stop_time)

    df = pd.DataFrame(rows)
    
    df['duration'] = df["stop_time"] - df["start_time"]
    
    return df

In [7]:
def create_general_df(file, fileList=False):
    def find_first_element(list_data, condition):
        for element in list_data:
            if condition(element):
                return element
        return None
    
    def crop_signals_array(start_time, stop_time, file):
        cropped_signals = []
        for channel, signal in enumerate(file["signals"]):
            start_idx = round(start_time * file['signal_headers'][channel]['sample_rate'])
            stop_idx = round(stop_time * file['signal_headers'][channel]['sample_rate'])
            cropped_signals.append(np.array(signal[start_idx: stop_idx]))
        return np.array(cropped_signals)

    general = list(filter(lambda x : "_" in x[-1], file["header"]["annotations"]))

    rows = {"set": [], "subject": [], "category": [], "sample": [], "start_time": [], "stop_time": [], "signals": []}

    for i, row in enumerate(general):
        time, _, desc = row
        s = desc.split("_")
        
        if s[0] == "C":
            if s[-1] == "start":
                _, cat, _ = s
            else:
                cat = None
            
        else:
            m, sample, event = s
            if event == "start":
                start_time = time
                stop_time, _, _ = find_first_element(general[i:], lambda x: x[-1] == f"{m}_{sample}_stop")
                if not fileList:
                    rows["set"].append(1)
                    rows["subject"].append(Path(file["filepath"]).stem)
                    rows["category"].append(cat)
                    rows["sample"].append(s[1])
                    rows["start_time"].append(start_time)
                    rows["stop_time"].append(stop_time)
                    rows["signals"].append(crop_signals_array(start_time, stop_time, file))

    #df = pd.DataFrame(rows)
    
    #df['duration'] = df["stop_time"] - df["start_time"]
     
    return pd.DataFrame(rows)

In [8]:
file = files[3]

In [9]:
if any(map(lambda x : not re.match(r"\w_\w+_(start|stop)", x[-1]), file["header"]["annotations"])):
    print("There are annotations out of the pattern.")

There are annotations out of the pattern.


In [10]:
swallow = list(filter(lambda x : "_" not in x[-1], file["header"]["annotations"]))
swallow

[[2.8425, -1.0, 'Schlucken normal'],
 [8.5357, -1.0, 'stop'],
 [11.0007, -1.0, 'Schlucken normal'],
 [18.3997, -1.0, 'stop'],
 [20.3732, -1.0, 'Schlucken hoch'],
 [26.7485, -1.0, 'stop'],
 [28.8607, -1.0, 'Schlucken hoch'],
 [33.0822, -1.0, 'stop'],
 [34.8947, -1.0, 'Schlucken tief'],
 [40.2047, -1.0, 'stop'],
 [41.9152, -1.0, 'Schlucken tief'],
 [45.999, -1.0, 'stop'],
 [48.576, -1.0, 'Schlucken Knie re'],
 [56.2337, -1.0, 'stop'],
 [58.7792, -1.0, 'Schlucken Knie li'],
 [64.0495, -1.0, 'stop'],
 [67.543, -1.0, 'Mendelson'],
 [79.005, -1.0, 'stop'],
 [81.05, -1.0, 'Mendelson'],
 [86.7565, -1.0, 'stop'],
 [92.9237, -1.0, 'Sprechen'],
 [97.8187, -1.0, 'stop'],
 [100.56, -1.0, 'Zunge'],
 [106.7885, -1.0, 'stop'],
 [108.7557, -1.0, 'Z..hne'],
 [114.7772, -1.0, 'stop'],
 [118.2972, -1.0, 'Blick auf Knie re'],
 [123.7447, -1.0, 'stop'],
 [125.433, -1.0, 'Blick auf Knie li'],
 [128.539, -1.0, 'stop'],
 [134.1335, -1.0, 'Kopf rechts'],
 [139.4545, -1.0, 'stop'],
 [141.8857, -1.0, 'Kopf links'

In [11]:
pattern_mismatches = [i for i, val in enumerate(map(lambda x : re.match(r"\w_\w+_(start|stop)", x[-1]), file["header"]["annotations"])) if val is None]

In [12]:
general = list(filter(lambda x : "_" in x[-1], file["header"]["annotations"]))
general

[[5.925, -1.0, 'C_category2_start'],
 [6.687, -1.0, 'P_elevation_start'],
 [6.687, -1.0, 'P_swallow_start'],
 [7.304, -1.0, 'P_elevation_stop'],
 [7.304, -1.0, 'P_lowering_start'],
 [7.539, -1.0, 'P_lowering_stop'],
 [7.539, -1.0, 'P_swallow_stop'],
 [15.463, -1.0, 'P_elevation_start'],
 [15.463, -1.0, 'P_swallow_start'],
 [16.079, -1.0, 'P_elevation_stop'],
 [16.079, -1.0, 'P_lowering_start'],
 [16.719, -1.0, 'P_lowering_stop'],
 [16.719, -1.0, 'P_swallow_stop'],
 [19.502, -1.0, 'C_category2_stop'],
 [83.653, -1.0, 'P_swallow_start'],
 [85.317, -1.0, 'P_swallow_stop'],
 [156.447, -1.0, 'P_movement_start'],
 [158.004, -1.0, 'P_movement_stop'],
 [162.616, -1.0, 'P_movement_start'],
 [163.881, -1.0, 'P_movement_stop']]

In [13]:
def ann_time_to_string(time_seconds):

    time = time_seconds
    h = int(time / 60 / 60)
    m = int((time - h * 60 * 60) / 60)
    s = int(time - h * 60 * 60 - m * 60)
    string = str(h) + ":" + str(m) + ":" + str(s)
    return string

In [14]:
def check_T_annotations(ann_list):    
    check = True

    # check test annotations
    for i in range(0, len(ann_list)):

        # check annotation format
        split_string = ann_list[i][2].split("_")
        if len(split_string) != 3:
            print("Wrong Probe Annotation Format " + ann_list[i][2] + "T_<label>_start/stop required")

        # check if stop annotation is available for start annotation
        if "start" in ann_list[i][2]:
            ann_start = ann_list[i][2]
            found = False
            for j in range(i+1, len(ann_list)):
                if ann_start[0:-6] == ann_list[j][2][0:-5]:
                    found = True
                    break

            if not found:
                check = False
                time = ann_list[i][0]
                time_string = ann_time_to_string(time)
                print("No Stop For Test " + ann_start + " " + time_string)

        # check if start annotation is available for stop annotation
        if "stop" in ann_list[i][2]:
            ann_stop = ann_list[i][2]
            found = False
            j = i-1
            while j >= 0:
                if ann_stop[0:-5] == ann_list[j][2][0:-6]:
                    found = True
                    break
                j = j - 1

            if found == False:
                check = False
                time = ann_list[i][0]
                time_string = ann_time_to_string(time)
                print("No Start For Test " + ann_stop + " " + time_string)

    return check

In [15]:
def check_C_annotations(C_ann_list):

    # check category annotations
    i = 0
    check = True
    while i < len(C_ann_list)-1:

        # check annotation format
        split_string = C_ann_list[i][2].split("_")
        if len(split_string) != 3:
            print("Wrong Category Annotation Format " + C_ann_list[i][2] + "C_<label>_start/stop required")

        if "start" in C_ann_list[i][2] and "stop" in C_ann_list[i+1][2]:
            ann1 = C_ann_list[i][2]
            ann2 = C_ann_list[i+1][2]

            # check identical category label
            if ann1.split("_")[1] != ann2.split("_")[1]:
                time_string_1 = ann_time_to_string(C_ann_list[i][0])
                time_string_2 = ann_time_to_string(C_ann_list[i+1][0])

                print("Category Mismatch for " + C_ann_list[i][2] + " at " + time_string_1 +
                      " and " + C_ann_list[i+1][2] + " at " + time_string_2)
                check = False

            i = i + 2

        elif "start" in C_ann_list[i][2] and "stop" not in C_ann_list[i + 1][2]:
            time_string = ann_time_to_string(C_ann_list[i][0])
            print("Missing Stop Annotations for " + C_ann_list[i][2] + " at " + time_string)
            check = False
            i = i + 1

    return check

In [16]:
swallows_df = create_swallows_df(file)
swallows_df

Unnamed: 0,set,filepath,category,sample,start_time,stop_time,duration


In [17]:
general_df = create_general_df(file)
general_df

Unnamed: 0,set,subject,category,sample,start_time,stop_time,signals
0,1,1-8-Bewegung_edited,category2,elevation,6.687,7.304,"[[-10.326907952243564, -10.325298030692222, -1..."
1,1,1-8-Bewegung_edited,category2,swallow,6.687,7.539,"[[-10.326907952243564, -10.325298030692222, -1..."
2,1,1-8-Bewegung_edited,category2,lowering,7.304,7.539,"[[-10.381005727112635, -10.374102912789757, -1..."
3,1,1-8-Bewegung_edited,category2,elevation,15.463,16.079,"[[-10.418122137673029, -10.399023616255738, -1..."
4,1,1-8-Bewegung_edited,category2,swallow,15.463,16.719,"[[-10.418122137673029, -10.399023616255738, -1..."
5,1,1-8-Bewegung_edited,category2,lowering,16.079,16.719,"[[-10.403279984192848, -10.402750694915694, -1..."
6,1,1-8-Bewegung_edited,,swallow,83.653,85.317,"[[-10.476983516036482, -10.449945655461887, -1..."
7,1,1-8-Bewegung_edited,,movement,156.447,158.004,"[[-9.821458746281788, -9.806594539081726, -9.8..."
8,1,1-8-Bewegung_edited,,movement,162.616,163.881,"[[-9.7519674749355, -9.754084632044114, -9.750..."


In [18]:
len(general_df.signals.iloc[8][0]) / file['signal_headers'][0]['sample_rate']

1.265

In [20]:
def create_signals_df(file):
    
    def compute_time(sampling_frequency, signal_array):
        # Calculate the time array based on the length of the signal array and the sampling frequency
        total_samples = len(signal_array)
        time_array = np.arange(total_samples) / sampling_frequency
        
        return time_array
    
    rows = {"filepath": [], "data_label": [], "time": [], "signal": []}

    for header, signal in zip(file['signal_headers'], file["signals"]):
        data_label = header["label"]
        fs = header["sample_frequency"]
        time = compute_time(fs, signal)
        for t, s in zip(time, signal):
            rows["filepath"].append(file["filepath"])
            rows["data_label"].append(data_label)
            rows["time"].append(t)
            rows["signal"].append(s)
            
    return pd.DataFrame(rows)

In [21]:
def compute_time(sampling_frequency, signal_array):
    # Calculate the time array based on the length of the signal array and the sampling frequency
    total_samples = len(signal_array)
    time_array = np.arange(total_samples) / sampling_frequency
    
    return time_array

In [22]:
sig_arr = file['signals'][0]
fs = file['signal_headers'][0]['sample_rate']

In [23]:
time = compute_time(fs, sig_arr)

In [24]:
signals_df = create_signals_df(file)
signals_df

Unnamed: 0,filepath,data_label,time,signal
0,data\edf\1-8-Bewegung_edited.bdf,EMG 1,0.00000,-8.244640
1,data\edf\1-8-Bewegung_edited.bdf,EMG 1,0.00025,-10.401141
2,data\edf\1-8-Bewegung_edited.bdf,EMG 1,0.00050,-10.401670
3,data\edf\1-8-Bewegung_edited.bdf,EMG 1,0.00075,-10.392672
4,data\edf\1-8-Bewegung_edited.bdf,EMG 1,0.00100,-10.384711
...,...,...,...,...
3639995,data\edf\1-8-Bewegung_edited.bdf,sync,181.99875,0.000000
3639996,data\edf\1-8-Bewegung_edited.bdf,sync,181.99900,0.000000
3639997,data\edf\1-8-Bewegung_edited.bdf,sync,181.99925,0.000000
3639998,data\edf\1-8-Bewegung_edited.bdf,sync,181.99950,0.000000


In [None]:
signals_df_crop = signals_df[signals_df.data_label.isin(['EMG 1', 'BI 1'])]
signals_df_crop

## TSFresh tests

In [None]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters

In [None]:
extraction_settings = ComprehensiveFCParameters()

X = extract_features(signals_df_crop, column_id='data_label', column_sort='time', column_value='signal',
                     default_fc_parameters=extraction_settings,
                     # we impute = remove all NaN features automatically
                     impute_function=impute)

In [None]:
extract_features(signals_df_crop, column_id="filepath", column_sort="time", column_kind="data_label", column_value="signal")

## Annotations validation -> throw an error for the mistake if found (and where is it -> find the location of the mistake)