In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mne
import os, fnmatch

In [2]:
def file_keeper(psg_file_path, hyp_file_path):

    data = mne.io.read_raw_edf(psg_file_path, stim_channel="Event marker", infer_types=True, preload=True)
    annotations = mne.read_annotations(hyp_file_path)

    data.filter(0.5, 30, picks=[0, 1, 2])
    data.filter(None, 5, picks=[4])
    

    return data, annotations

In [3]:
def crop_set_annotations(data, annotations):

    annotations.crop(annotations[1]["onset"] - 30*60, annotations[-2]["onset"] + 30*60)
    data.set_annotations(annotations, emit_warning=False)

    annotations_stage_id = {
        "Sleep stage W": 1,
        "Sleep stage 1": 2,
        "Sleep stage 2": 3,
        "Sleep stage 3": 4,
        "Sleep stage 4": 5,
        "Sleep stage R": 6,
    }

    events_from_the_file, event_id_info = mne.events_from_annotations(
        data, event_id= annotations_stage_id, chunk_duration= 30.0
    )

    return events_from_the_file, event_id_info, annotations_stage_id


In [4]:
def create_signals(data, events_from_the_file, annotations_id, psg_file, epochs_array):

    tmax = 30.0 - 1.0 / data.info["sfreq"]
    print(f"Number of epochs in events_from_the_file for the file: {len(events_from_the_file)}")

    epochs_from_the_file = mne.Epochs(
        raw=data,
        events=events_from_the_file,
        event_id=annotations_id,
        tmin=0.0,
        tmax=tmax,
        baseline=None,
        verbose=False        
    )
    epochs_array.append(epochs_from_the_file)

    print(f"\n  SIGNALS HAVE BEEN PREPARED FOR {psg_file.split("-")[0][0:-2]}.\n")


In [None]:
def dataset_create(files_directory):

    epochs_array = []
  
    all_files = os.listdir(files_directory)
    not_processed_files = []
    all_psg_files = []
    all_hyp_files = []

    for file in all_files:

        if file[0] == '.': # for ignoring hidden files in the directory
            continue
    
        parts = file.split("-")
    
        if parts[1] == "PSG.edf":
            all_psg_files.append(file)
        elif parts[1] == "Hypnogram.edf":
            all_hyp_files.append(file)

    # for controlling status progress
    old_process_percentage = 0
    process_percentage = 0
    iteration = 0

    for psg_file in all_psg_files:

        hyp_file = psg_file.split("-")[0][:-2] + "*" + "-Hypnogram.edf"
        possible_hyp = fnmatch.filter(all_hyp_files, hyp_file)

        if possible_hyp:

            hyp_file = possible_hyp[0]

            print(f"\n================ Files currently being processed: {psg_file}, {hyp_file} ================")

            psg_file_path = files_directory + "/" + psg_file
            hyp_file_path = files_directory + "/" + hyp_file

            data, annotations = file_keeper(psg_file_path, hyp_file_path)
            print("======== Got data and annotations. ========")

            events_from_the_file, annotations_id, _ = crop_set_annotations(data, annotations)
            print("======== Annotations cropped and set. ========")

            create_signals(data, events_from_the_file, annotations_id, psg_file, epochs_array)
            
        else:
            not_processed_file = psg_file.split("-")[0][:-2]    # Get number of the candidate, i.e. SC4812
            print(f"No such hypnogram file for {not_processed_file}")
            not_processed_files.append(not_processed_file)

        iteration += 1

        process_percentage = round(iteration / len(all_psg_files) * 100)   # process status controlling

        if process_percentage != old_process_percentage:
            print(f"======== Extracting signals data from PSG files: {process_percentage}% ========\n")
        old_process_percentage = process_percentage

    print("END. Arrays for the dataset have been prepared.")
    if not_processed_files:
        print(f"Files that weren't processed: {not_processed_files}")

    return epochs_array
