# Libraries

In [1]:
import pyedflib
import plotly.express as px
from pathlib import Path
import os
import pandas as pd
import numpy as np
import EDF_wrapper
import filters
import re
from SwallowDetection.SwallowAnntations import get_swallow_annotations

In [2]:
directory = Path("data/edf/")
files = EDF_wrapper.read_files_from_dir(directory, load_files=True)
len(files)

6

In [3]:
def add_swallow_annotations(files: list, output_path:str="data/annotations/"):
    
    # Extract annotations from signal
    ann = []    
    for edf_file in np.asarray([file['filepath'] for file in files]):
        try:
            ann.append(get_swallow_annotations(edf_file))
        except:
            print(f"File {edf_file} failed to get swallow annotations.")
    
    for file, (times, annotations) in zip(files, ann):
        # Add extracted annotations to file's annotation list
        for time, annotation in zip(times, annotations):
            file['header']['annotations'].append([time, -1, annotation])
            
        file['header']['annotations'].sort(key=lambda x: x[0])
        # Save edited edf file
        EDF_wrapper.save_edf_file(file, output_path=output_path)

In [None]:
add_swallow_annotations(files);

In [4]:
saved_file = pyedflib.highlevel.read_edf_header(f"data/annotations/{Path(files[-2]['filepath']).name}")

In [5]:
saved_file['annotations']

[[0.0, b'0', 'saliva'],
 [2.3989, b'0', 'Ambu BlueSensor'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [47.2, -1, 's_swallow_stop'],
 [47.2, -1, 's_swallow_stop'],
 [47.2, -1, 's_swallow_stop'],
 [47.2, -1, 's_swallow_stop'],
 

In [6]:
def compute_time(sampling_frequency, signal_array):
        # Calculate the time array based on the length of the signal array and the sampling frequency
        total_samples = len(signal_array)
        time_array = np.arange(total_samples) / sampling_frequency
        
        return time_array

In [7]:
def create_swallows_df(file, fileList=False):
    swallow = iter(filter(lambda x : "S_" in x[-1], file["header"]["annotations"]))

    rows = {"set": [], "filepath": [], "category": [], "sample_name": [], "start_time": [], "stop_time": []}

    for ann in swallow:
        start_time, _, desc = ann
        stop_time, _, _ = next(swallow)
        
        if not fileList:
            rows["set"].append(1)
            rows["filepath"].append(Path(file["filepath"]).name)
            rows["category"].append(None)
            rows["sample_name"].append(desc)
            rows["start_time"].append(start_time)
            rows["stop_time"].append(stop_time)

    df = pd.DataFrame(rows)
    
    df['duration'] = df["stop_time"] - df["start_time"]
    
    return df

In [8]:
def create_general_df(file, fileList=False):
    def find_first_element(list_data, condition):
        for element in list_data:
            if condition(element):
                return element
        return None
    
    def crop_signals_array(start_time, stop_time, file):
        cropped_signals = []
        for channel, signal in enumerate(file["signals"]):
            sr = file['signal_headers'][channel]['sample_rate']
            start_idx = round(start_time * sr)
            stop_idx = round(stop_time * sr) + 1
            time_array = compute_time(sr, signal)
            cropped_signals.append((time_array[start_idx: stop_idx], np.array(signal[start_idx: stop_idx])))
        return list(zip(file['signal_headers'], np.array(cropped_signals)))

    general = list(filter(lambda x : "_" in x[-1], file["header"]["annotations"]))

    rows = {"set": [], "subject": [], "category": [], "sample_name": [],
            "start_time": [], "stop_time": [],
             "header": [], "data_label": [], "time": [], "signal": []}

    for i, row in enumerate(general):
        time, _, desc = row
        s = desc.split("_")
        
        if s[0] == "C":
            if s[-1] == "start":
                _, cat, _ = s
            else:
                cat = '-'
            
        else:
            m, sample, event = s
            if event == "start":
                start_time = time
                stop_time, _, _ = find_first_element(general[i:], lambda x: x[-1] == f"{m}_{sample}_stop")
                if not fileList:
                    signals = crop_signals_array(start_time, stop_time, file)
                    for h, sigs in signals:
                        rows["set"].append(1)
                        rows["subject"].append(Path(file["filepath"]).stem)
                        rows["category"].append(cat)
                        rows["sample_name"].append(s[1])
                        rows["start_time"].append(start_time)
                        rows["stop_time"].append(stop_time)
                        rows["header"].append(h)
                        rows["data_label"].append(h['label'])
                        rows["time"].append(sigs[0])
                        rows["signal"].append(sigs[1])

    df = pd.DataFrame(rows)
    
    df = df.explode(['time', 'signal']).reset_index(drop=True)
    
    df["id"] = (df["subject"] + df["category"] + df["sample_name"]).astype("category")
    df["data_label"] = df["data_label"].astype("category")
    
    df["time"] = df["time"].astype(float)
    df["signal"] = df["signal"].astype(float)
    
    cat_columns = df.select_dtypes(['category']).columns

    df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)
    
    # df['duration'] = df["stop_time"] - df["start_time"]
     
    return df

In [9]:
file = files[4]

In [24]:
from annotations_validation import check_T_annotations, check_C_annotations

def check_annotations(file):
    match_pattern = r"[cst]_\w+_(start|stop)"
    annotations = file["header"]["annotations"]
    
    if any(map(lambda x : not re.match(match_pattern, x[-1]), annotations)):
        pattern_mismatches = [i for i, val in enumerate(map(lambda x : re.match(match_pattern, x[-1]), annotations)) if val is None]
        print(f"There are {len(pattern_mismatches)} annotations out of the pattern.")
        #for idx in pattern_mismatches:
        #    print(idx, file["header"]["annotations"][idx])
    
    elif check_C_annotations(list(filter(lambda x : x[-1].startswith("c_"), annotations))):
        if check_T_annotations(list(filter(lambda x : re.match(r"[t]_", x[-1]), annotations))):
            return True
    
    elif not annotations:
        print("There are no annotations in the file.")
        

In [26]:
file["header"]["annotations"] 

[[0.0, b'0', 'saliva'],
 [2.3989, b'0', 'Ambu BlueSensor'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [15.13, -1, 's_swallow_start'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [17.57, -1, 's_swallow_stop'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [30.79, -1, 's_swallow_start'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [33.13, -1, 's_swallow_stop'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [45.17, -1, 's_swallow_start'],
 [47.2, -1, 's_swallow_stop'],
 [47.2, -1, 's_swallow_stop'],
 [47.2, -1, 's_swallow_stop'],
 [47.2, -1, 's_swallow_stop'],
 

In [14]:
file["header"]["annotations"].append([15.20, -1, 't_swall_start'])

In [27]:
check_annotations(file)

There are 25 annotations out of the pattern.


In [None]:
pattern_mismatches = [i for i, val in enumerate(map(lambda x : re.match(r"\w_\w+_(start|stop)", x[-1]), file["header"]["annotations"])) if val is None]
pattern_mismatches

In [None]:
swallows_df = create_swallows_df(file)
swallows_df

In [None]:
general_df = create_general_df(file)
final = general_df.loc[general_df['data_label'] == 2, ['id', 'time', 'signal']]
final

## TSFresh tests

In [None]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters

In [None]:
final['time'] = (final['time'] * 10e+5).astype(int)
final['signal'] = (final['signal'] * 10e+5).astype(int)

In [None]:
final.dtypes

In [None]:
# Define your extraction settings (optional)
extraction_settings = ComprehensiveFCParameters()

# # Remove the 'sample_entropy' calculation from the extraction settings
# extraction_settings.pop('sample_entropy', None)

# Perform feature extraction without the 'sample_entropy' calculation
X = extract_features(final.head(15000), column_id='id', column_sort='time',
                     #column_kind='data_label', column_value='signal',
                     default_fc_parameters=extraction_settings,
                     impute_function=impute
                     )

In [None]:
X.to_excel('data/xlsx/bewegungs_edited-data-label-2.xlsx')