# Libraries

In [1]:
import pyedflib
import plotly.express as px
from pathlib import Path
import os
import pandas as pd
import numpy as np
import EDF_wrapper
import filters
import re
from SwallowDetection.SwallowAnntations import get_swallow_annotations

In [None]:
directory = Path("data/edf/")
files = EDF_wrapper.read_files_from_dir(directory, load_files=True)
len(files)

In [2]:
def add_swallow_annotations(files: list, output_path:str="data/annotations/"):
    
    # Extract annotations from signal
    ann = []    
    for edf_file in np.asarray([file['filepath'] for file in files]):
        try:
            ann.append(get_swallow_annotations(edf_file))
        except:
            print(f"File {edf_file} failed to get swallow annotations.")
    
    for file, (times, annotations) in zip(files, ann):
        # Add extracted annotations to file's annotation list
        for time, annotation in zip(times, annotations):
            file['header']['annotations'].append([time, -1, annotation])
            
        file['header']['annotations'].sort(key=lambda x: x[0])
        # Save edited edf file
        EDF_wrapper.save_edf_file(file, output_path=output_path)

In [None]:
add_swallow_annotations(files)

In [3]:
directory = Path("data/annotations/")
annotated_files = EDF_wrapper.read_files_from_dir(directory, load_files=True)
annotated_files

[{'filepath': 'data/annotations/1-1-Bewegung_edited.bdf',
  'signals': array([[-2.65346664e+00, -4.09815972e+00, -4.09921836e+00, ...,
           1.10000000e-05,  1.10000000e-05,  1.10000000e-05],
         [-1.54232685e+00, -1.44602274e+00, -1.44344180e+00, ...,
           1.10000000e-05,  1.10000000e-05,  1.10000000e-05],
         [-3.29886942e+00,  4.98973153e+00,  4.99647033e+00, ...,
           7.62355993e-06,  7.62355993e-06,  7.62355993e-06],
         [-4.99413000e+00, -5.02254299e-02, -4.44210468e-02, ...,
           8.35312535e-06,  8.35312535e-06,  8.35312535e-06],
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00]]),
  'signal_headers': [{'label': 'EMG 1',
    'dimension': 'mV',
    'sample_rate': 4000.0,
    'sample_frequency': 4000.0,
    'physical_max': 1.1e-05,
    'physical_min': -4.25516,
    'digital_max': 8388607,
    'digital_min': -8388608,
    'prefilter': '',
    'transducer': 'transkutan'

In [4]:
def compute_time(sampling_frequency, signal_array):
        # Calculate the time array based on the length of the signal array and the sampling frequency
        total_samples = len(signal_array)
        time_array = np.arange(total_samples) / sampling_frequency
        
        return time_array
    
def find_first_element(list_data, condition):
        for element in list_data:
            if condition(element):
                return element
        return None

In [5]:
def create_annotations_df(file, df_type='general', fileList=False):
    
    def crop_signals_array(start_time, stop_time, file):
        cropped_signals = []
        for channel, signal in enumerate(file["signals"]):
            sr = file['signal_headers'][channel]['sample_rate']
            start_idx = round(start_time * sr)
            stop_idx = round(stop_time * sr) + 1
            time_array = compute_time(sr, signal)
            cropped_signals.append((time_array[start_idx: stop_idx], np.array(signal[start_idx: stop_idx])))
            
        return list(zip(file['signal_headers'], np.array(cropped_signals)))
    
    if df_type == 'general':
        match_pattern = "[pct]_"
    else:
        match_pattern = "s_"

    general = list(filter(lambda x : re.match(match_pattern, x[-1]), file["header"]["annotations"]))

    id_rows = {"set": [], "subject": [], "category": [], "sample_name": [],
            "start_time": [], "stop_time": [],
             }
    
    signal_rows = {"id": [],
             "data_label": [],
             "time": [],
             "signal": []
             }
    
    cat = '-'
    id = -1
    for i, row in enumerate(general):
        time, _, desc = row
        s = desc.split("_")
        t, sample, event = s
        
        if t == "c":
            if event == "start":
                _, cat, _ = s
            else:
                cat = '-'
            
        else:
            if event == "start":
                start_time = time
                stop_time, _, _ = find_first_element(general[i:], lambda x: x[-1] == f"{t}_{sample}_stop")
                if not fileList:
                    id += 1
                    signals = crop_signals_array(start_time, stop_time, file)
                                            
                    id_rows["set"].append(1)
                    id_rows["subject"].append(Path(file["filepath"]).stem)
                    id_rows["category"].append(cat)
                    id_rows["sample_name"].append(s[1])
                    id_rows["start_time"].append(start_time)
                    id_rows["stop_time"].append(stop_time)
                    
                    for h, sigs in signals:
                        signal_rows["id"].append(id)
                        signal_rows["data_label"].append(h['label'])
                        signal_rows["time"].append(sigs[0])
                        signal_rows["signal"].append(sigs[1])

    main_df = pd.DataFrame(id_rows)
    
    signals_df = pd.DataFrame(signal_rows)
    
    df = main_df.merge(signals_df, left_index=True, right_on='id')
    
    df.drop('id', axis=1, inplace=True)
    
    # df = df.explode(['time', 'signal']).reset_index(drop=True)
    
    # df["id"] = (df["subject"] + df["category"] + df["sample_name"]).astype("category")
    # df["data_label"] = df["data_label"].astype("category")
    
    # df["time"] = df["time"].astype(float)
    # df["signal"] = df["signal"].astype(float)
    
    # cat_columns = df.select_dtypes(['category']).columns

    # df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)
    
    # df['duration'] = df["stop_time"] - df["start_time"]
     
    return df
    #return main_df, signals_df

In [6]:
from annotations_validation import check_annotations

In [7]:
ann_file = annotated_files[0]
ann_file

{'filepath': 'data/annotations/1-1-Bewegung_edited.bdf',
 'signals': array([[-2.65346664e+00, -4.09815972e+00, -4.09921836e+00, ...,
          1.10000000e-05,  1.10000000e-05,  1.10000000e-05],
        [-1.54232685e+00, -1.44602274e+00, -1.44344180e+00, ...,
          1.10000000e-05,  1.10000000e-05,  1.10000000e-05],
        [-3.29886942e+00,  4.98973153e+00,  4.99647033e+00, ...,
          7.62355993e-06,  7.62355993e-06,  7.62355993e-06],
        [-4.99413000e+00, -5.02254299e-02, -4.44210468e-02, ...,
          8.35312535e-06,  8.35312535e-06,  8.35312535e-06],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]]),
 'signal_headers': [{'label': 'EMG 1',
   'dimension': 'mV',
   'sample_rate': 4000.0,
   'sample_frequency': 4000.0,
   'physical_max': 1.1e-05,
   'physical_min': -4.25516,
   'digital_max': 8388607,
   'digital_min': -8388608,
   'prefilter': '',
   'transducer': 'transkutan'},
  {'label': 'EMG 3

In [8]:
ann_file["header"]["annotations"] = list(map(lambda x: [x[0], x[1], x[2].lower()], ann_file["header"]["annotations"]))
ann_file["header"]["annotations"]

[[1.6465, -1.0, 'schlucken normal'],
 [4.027, -1.0, 'c_category1_start'],
 [4.646, -1.0, 'p_elevation_start'],
 [4.646, -1.0, 'p_swallow_start'],
 [4.768, -1.0, 'p_elevation_stop'],
 [4.768, -1.0, 'p_lowering_start'],
 [4.996, -1.0, 'p_lowering_stop'],
 [4.996, -1.0, 'p_swallow_stop'],
 [6.9507, -1.0, 'stop'],
 [8.94, -1.0, 'schlucken normal'],
 [11.11, -1.0, 'p_elevation_start'],
 [11.223, -1.0, 'p_elevation_stop'],
 [11.223, -1.0, 'p_lowering_start'],
 [11.413, -1.0, 'p_lowering_stop'],
 [11.913, -1.0, 'c_category1_stop'],
 [12.9407, -1.0, 'stop'],
 [15.109, -1.0, 'schlucken hoch'],
 [18.18, -1.0, 's_swallow_start'],
 [19.51, -1.0, 's_swallow_stop'],
 [19.5507, -1.0, 'stop'],
 [21.9807, -1.0, 'schlucken hoch'],
 [25.2005, -1.0, 'stop'],
 [27.386, -1.0, 'schlucken tief'],
 [31.415, -1.0, 'stop'],
 [33.6, -1.0, 'schlucken tief'],
 [36.6657, -1.0, 'stop'],
 [51.646, -1.0, 'mendelson'],
 [53.711, -1.0, 'c_category2_start'],
 [54.376, -1.0, 'p_elevation_start'],
 [54.376, -1.0, 'p_swallow

In [9]:
check_annotations(ann_file)

There are 36 annotations out of the pattern.


True

In [12]:
general_df = create_annotations_df(ann_file, 'general')
swallows_df = create_annotations_df(ann_file, 'swallows')

## TSFresh tests

In [None]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters, MinimalFCParameters, EfficientFCParameters

In [None]:
final.dtypes

In [None]:
# Define your extraction settings (optional)
extraction_settings = MinimalFCParameters()

# # Remove the 'sample_entropy' calculation from the extraction settings
# extraction_settings.pop('sample_entropy', None)

# Perform feature extraction without the 'sample_entropy' calculation
X = extract_features(final.head(15000), column_id='id', column_sort='time',
                     #column_kind='data_label', column_value='signal',
                     default_fc_parameters=extraction_settings,
                     impute_function=impute
                     )

In [None]:
X.to_excel('data/xlsx/bewegungs_edited-data-label-2_minimal.xlsx')

In [None]:
X