# Libraries

In [1]:
import pyedflib
import plotly.express as px
from pathlib import Path
import os
import pandas as pd
import numpy as np
import EDF_wrapper
import filters
import re
from SwallowDetection.SwallowAnntations import get_swallow_annotations

In [74]:
directory = Path("data/edf/")
files = EDF_wrapper.read_files_from_dir(directory, load_files=True)
len(files)

6

In [75]:
def add_swallow_annotations(files: list, output_path:str="data/annotations/"):
    
    # Extract annotations from signal
    ann = []    
    for edf_file in np.asarray([file['filepath'] for file in files]):
        try:
            ann.append(get_swallow_annotations(edf_file))
        except:
            print(f"File {edf_file} failed to get swallow annotations.")
    
    for file, (times, annotations) in zip(files, ann):
        # Add extracted annotations to file's annotation list
        for time, annotation in zip(times, annotations):
            file['header']['annotations'].append([time, -1, annotation])
            
        file['header']['annotations'].sort(key=lambda x: x[0])
        # Save edited edf file
        EDF_wrapper.save_edf_file(file, output_path=output_path)

In [None]:
add_swallow_annotations(files)

In [60]:
saved_file = pyedflib.highlevel.read_edf_header(f"data/annotations/{Path(files[3]['filepath']).name}")

In [61]:
saved_file['annotations']

[[2.8425, -1, 'Schlucken normal'],
 [5.925, -1, 'C_category2_start'],
 [6.64, -1, 's_swallow_start'],
 [6.687, -1, 'P_elevation_start'],
 [6.687, -1, 'P_swallow_start'],
 [7.304, -1, 'P_elevation_stop'],
 [7.304, -1, 'P_lowering_start'],
 [7.539, -1, 'P_lowering_stop'],
 [7.539, -1, 'P_swallow_stop'],
 [7.74, -1, 's_swallow_stop'],
 [8.5357, -1, 'stop'],
 [10.53, -1, 's_swallow_start'],
 [11.0007, -1, 'Schlucken normal'],
 [11.8, -1, 's_swallow_stop'],
 [15.45, -1, 's_swallow_start'],
 [15.463, -1, 'P_elevation_start'],
 [15.463, -1, 'P_swallow_start'],
 [16.079, -1, 'P_elevation_stop'],
 [16.079, -1, 'P_lowering_start'],
 [16.719, -1, 'P_lowering_stop'],
 [16.719, -1, 'P_swallow_stop'],
 [16.73, -1, 's_swallow_stop'],
 [18.3997, -1, 'stop'],
 [19.502, -1, 'C_category2_stop'],
 [20.3732, -1, 'Schlucken hoch'],
 [23.76, -1, 's_swallow_start'],
 [24.77, -1, 's_swallow_stop'],
 [26.7485, -1, 'stop'],
 [28.8607, -1, 'Schlucken hoch'],
 [30.98, -1, 's_swallow_start'],
 [32.03, -1, 's_swallo

In [76]:
def compute_time(sampling_frequency, signal_array):
        # Calculate the time array based on the length of the signal array and the sampling frequency
        total_samples = len(signal_array)
        time_array = np.arange(total_samples) / sampling_frequency
        
        return time_array

In [89]:
# Assess necessity of this function given that the general_df function should be enough
def create_swallows_df(file, fileList=False):
    swallow = iter(filter(lambda x : re.match("s_", x[-1]), file["header"]["annotations"]))

    rows = {"set": [], "filepath": [], "category": [], "sample_name": [], "start_time": [], "stop_time": []}

    for ann in swallow:
        start_time, _, desc = ann
        stop_time, _, _ = next(swallow)
        
        if not fileList:
            rows["set"].append(1)
            rows["filepath"].append(Path(file["filepath"]).name)
            rows["category"].append(None)
            rows["sample_name"].append(desc)
            rows["start_time"].append(start_time)
            rows["stop_time"].append(stop_time)
        else:
            pass
            # Implement file list input if necessary

    df = pd.DataFrame(rows)
    
    df['duration'] = df["stop_time"] - df["start_time"]
    
    return df

In [90]:
def create_general_df(file, fileList=False):
    def find_first_element(list_data, condition):
        for element in list_data:
            if condition(element):
                return element
        return None
    
    def crop_signals_array(start_time, stop_time, file):
        cropped_signals = []
        for channel, signal in enumerate(file["signals"]):
            sr = file['signal_headers'][channel]['sample_rate']
            start_idx = round(start_time * sr)
            stop_idx = round(stop_time * sr) + 1
            time_array = compute_time(sr, signal)
            cropped_signals.append((time_array[start_idx: stop_idx], np.array(signal[start_idx: stop_idx])))
        return list(zip(file['signal_headers'], np.array(cropped_signals)))

    general = list(filter(lambda x : re.match("[pcts]_", x[-1]), file["header"]["annotations"]))

    rows = {"set": [], "subject": [], "category": [], "sample_name": [],
            "start_time": [], "stop_time": [],
             "header": [], "data_label": [], "time": [], "signal": []}

    for i, row in enumerate(general):
        time, _, desc = row
        s = desc.split("_")
        
        if s[0] == "c":
            if s[-1] == "start":
                _, cat, _ = s
            else:
                cat = '-'
            
        else:
            m, sample, event = s
            if event == "start":
                start_time = time
                stop_time, _, _ = find_first_element(general[i:], lambda x: x[-1] == f"{m}_{sample}_stop")
                if not fileList:
                    signals = crop_signals_array(start_time, stop_time, file)
                    for h, sigs in signals:
                        rows["set"].append(1)
                        rows["subject"].append(Path(file["filepath"]).stem)
                        rows["category"].append(cat)
                        rows["sample_name"].append(s[1])
                        rows["start_time"].append(start_time)
                        rows["stop_time"].append(stop_time)
                        rows["header"].append(h)
                        rows["data_label"].append(h['label'])
                        rows["time"].append(sigs[0])
                        rows["signal"].append(sigs[1])

    df = pd.DataFrame(rows)
    
    df = df.explode(['time', 'signal']).reset_index(drop=True)
    
    df["id"] = (df["subject"] + df["category"] + df["sample_name"]).astype("category")
    df["data_label"] = df["data_label"].astype("category")
    
    df["time"] = df["time"].astype(float)
    df["signal"] = df["signal"].astype(float)
    
    cat_columns = df.select_dtypes(['category']).columns

    df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)
    
    # df['duration'] = df["stop_time"] - df["start_time"]
     
    return df

In [79]:
file = files[3]

In [80]:
from annotations_validation import check_T_annotations, check_C_annotations

def check_annotations(file: dict) -> bool:
    match_pattern = r"[cst]_\w+_(start|stop)"
    annotations = file["header"]["annotations"]
    
    if any(map(lambda x : not re.match(match_pattern, x[-1]), annotations)):
        pattern_mismatches = [i for i, val in enumerate(map(lambda x : re.match(match_pattern, x[-1]), annotations)) if val is None]
        print(f"There are {len(pattern_mismatches)} annotations out of the pattern.")
        #for idx in pattern_mismatches:
        #    print(idx, file["header"]["annotations"][idx])
    
    if not annotations:
        print("There are no annotations in the file.")
        return False
    
    elif check_C_annotations(list(filter(lambda x : x[-1].startswith("c_"), annotations))):
        if check_T_annotations(list(filter(lambda x : re.match(r"[pts]_", x[-1]), annotations))):
            return True
        return False
    

In [81]:
saved_file

{'technician': 'Schultheiss',
 'recording_additional': 'Bewegung, Ref.',
 'patientname': 'Holger Nahrstaedt',
 'patient_additional': 'gesund',
 'patientcode': '1',
 'equipment': 'Device-Nr.: 0',
 'admincode': '',
 'gender': 'Male',
 'startdate': datetime.datetime(2010, 7, 20, 14, 45, 47),
 'birthdate': '15 jun 1980',
 'Duration': 182,
 'SignalHeaders': [{'label': 'EMG 1',
   'dimension': 'mV',
   'sample_rate': 4000.0,
   'sample_frequency': 4000.0,
   'physical_max': 185.0,
   'physical_min': -185.0,
   'digital_max': 8388607,
   'digital_min': -8388608,
   'prefilter': '',
   'transducer': 'transkutan'},
  {'label': 'EMG 3',
   'dimension': 'mV',
   'sample_rate': 4000.0,
   'sample_frequency': 4000.0,
   'physical_max': 185.0,
   'physical_min': -185.0,
   'digital_max': 8388607,
   'digital_min': -8388608,
   'prefilter': '',
   'transducer': 'transkutan'},
  {'label': 'BI 1',
   'dimension': 'Ohm',
   'sample_rate': 4000.0,
   'sample_frequency': 4000.0,
   'physical_max': 24.8109

In [84]:
file["header"]["annotations"] = list(map(lambda x: [x[0], x[1], x[2].lower()], file["header"]["annotations"]))

In [85]:
file["header"]["annotations"]

[[2.8425, -1.0, 'schlucken normal'],
 [5.925, -1.0, 'c_category2_start'],
 [6.687, -1.0, 'p_elevation_start'],
 [6.687, -1.0, 'p_swallow_start'],
 [7.304, -1.0, 'p_elevation_stop'],
 [7.304, -1.0, 'p_lowering_start'],
 [7.539, -1.0, 'p_lowering_stop'],
 [7.539, -1.0, 'p_swallow_stop'],
 [8.5357, -1.0, 'stop'],
 [11.0007, -1.0, 'schlucken normal'],
 [15.463, -1.0, 'p_elevation_start'],
 [15.463, -1.0, 'p_swallow_start'],
 [16.079, -1.0, 'p_elevation_stop'],
 [16.079, -1.0, 'p_lowering_start'],
 [16.719, -1.0, 'p_lowering_stop'],
 [16.719, -1.0, 'p_swallow_stop'],
 [18.3997, -1.0, 'stop'],
 [19.502, -1.0, 'c_category2_stop'],
 [20.3732, -1.0, 'schlucken hoch'],
 [26.7485, -1.0, 'stop'],
 [28.8607, -1.0, 'schlucken hoch'],
 [33.0822, -1.0, 'stop'],
 [34.8947, -1.0, 'schlucken tief'],
 [40.2047, -1.0, 'stop'],
 [41.9152, -1.0, 'schlucken tief'],
 [45.999, -1.0, 'stop'],
 [48.576, -1.0, 'schlucken knie re'],
 [56.2337, -1.0, 'stop'],
 [58.7792, -1.0, 'schlucken knie li'],
 [64.0495, -1.0, '

In [None]:
file["header"]["annotations"].append([15.20, -1, 't_swall_start'])

In [None]:
file["header"]["annotations"].pop()

In [86]:
check_annotations(file)

There are 62 annotations out of the pattern.


True

In [91]:
swallows_df = create_swallows_df(file)
swallows_df

Unnamed: 0,set,filepath,category,sample_name,start_time,stop_time,duration


In [92]:
general_df = create_general_df(file)
final = general_df.loc[general_df['data_label'] == 2, ['id', 'time', 'signal']]
final

Unnamed: 0,id,time,signal
0,2,6.68700,-10.326908
1,2,6.68725,-10.325298
2,2,6.68750,-10.335906
3,2,6.68775,-10.338045
4,2,6.68800,-10.349182
...,...,...,...
153836,0,163.88000,-9.764163
153837,0,163.88025,-9.761517
153838,0,163.88050,-9.768949
153839,0,163.88075,-9.768949


## TSFresh tests

In [None]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters, MinimalFCParameters

In [None]:
final.dtypes

In [None]:
# Define your extraction settings (optional)
extraction_settings = MinimalFCParameters()

# # Remove the 'sample_entropy' calculation from the extraction settings
# extraction_settings.pop('sample_entropy', None)

# Perform feature extraction without the 'sample_entropy' calculation
X = extract_features(final.head(15000), column_id='id', column_sort='time',
                     #column_kind='data_label', column_value='signal',
                     default_fc_parameters=extraction_settings,
                     impute_function=impute
                     )

In [None]:
X.to_excel('data/xlsx/bewegungs_edited-data-label-2_minimal.xlsx')