In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mne
import os
import math

In [None]:
def find_min_num_of_file(dir):
    print(f'========= The process of finding the optimal number of images for each stage has been started. =========')
    print(f'========= During the process files of the supplied directory will be analysed. =========\n')

    all_files_in_dir = os.listdir(dir)
    stages = [0, 0, 0, 0, 0, 0]

    all_psg_files = []
    all_hyp_files = []
    checked = 0
    annotation_desc_2_event_id = {
        "Sleep stage W": 1,
        "Sleep stage 1": 2,
        "Sleep stage 2": 3,
        "Sleep stage 3": 4,
        "Sleep stage 4": 5,
        "Sleep stage R": 6,
    }

    for file in all_files_in_dir:
        if file[0] == '.':    # for ignoring hidden files in the directory
            continue
        parts = file.split("-")
        if parts[1] == "PSG.edf":
            all_psg_files.append(file)
        elif parts[1] == "Hypnogram.edf":
            all_hyp_files.append(file)

    for file in all_psg_files:
        
        current_psg_file = dir + "/" + file

        hyp_file = file.split("-")[0][:-2] + "*" + "-Hypnogram.edf"
        possible_hyp_file = fnmatch.filter(all_hyp_files, hyp_file)

        if possible_hyp_file:

            current_hyp_file = dir + "/" + possible_hyp_file[0]

            data = mne.io.read_raw_edf(current_psg_file, stim_channel="Event marker", ifer_types=True, preload=True, verbose=False)

            annot_train = mne.read_anotations(current_hyp_file)

            annot_train.crop(annot_train[1]["onset"] - 30*60, annot_train[-2]["onset"] + 30*60)
            data.set_annotations(annot_train, emit_warning=False)


            events_train, _ = mne.events_from_annotations(
                data, event_id=annotation_desc_2_event_id, chunk_duration=30.0, verbose=False
            )

            for event in events_train:
                stages[event[2] - 1] += 1
            
            checked += 1

            print(f'========= Files have been analysed {checked}/{len(all_psg_files)} =========', end="\r")
        else:
            print(f'\n========= No such hypnogram file for {file} =========\n')

    print(f'\n========= Analysing has been done. =========\n')

    i = 0
    for stage in list(annotation_desc_2_event_id):
        print(f'========= Number of files for {stage} is {stage[i]} =========')
        i += 1

    print(f'========= Minimum number of files for each stage is {min(stages)} =========')

    return min(stages), max(stages)

In [3]:
def file_keeper(psg_file, hypnogram_file):
    data = mne.io.read_raw_edf(psg_file, stim_channel="Event marker", infer_types=True, preload=True)
    annotations = mne.read_annotations(hypnogram_file)
    return data, annotations

In [5]:
def all_sleep_stages(annotations):
    sleep_stages = []
    for i in annotations:
        if i.get('desription') in sleep_stages:
            pass
        else:
            sleep_stages.append(i.get('description'))
    return sleep_stages

In [1]:
def crop_set_annotations(data, annotations):
    annotations.crop(annotations[1]["onset"] - 30*60, annotations[-2]["onset"] + 30*60)
    data.set_annotatios(annotations, emit_warning=False)

    annotation_stage_id = {
        "Sleep stage W": 1,
        "Sleep stage 1": 2,
        "Sleep stage 2": 3,
        "Sleep stage 3": 4,
        "Sleep stage 4": 5,
        "Sleep stage R": 6,
    }

    events_train,_ = mne.events_from_annotations(
        data, evet_id=annotation_stage_id, chunk_duration=30.0
    )
    return events_train, annotation_stage_id

In [None]:
def create_directories(sleep_stages):

    dir_name = "dataset"

    if not os.path.isdir(dir_name):
        main_dir = dir_name
        train_dir = dir_name + "/train"
        train_files_dir = []
        for i in sleep_stages:
            if i == "Sleep stage ?":
                pass
            else:
                train_files_dir.append(dir_name + "/train/" + i)
        
        test_dir = "dataset/test"
        test_files_dir = []
        for i in sleep_stages:
            if i == 'Sleep stage ?':
                pass
            else:
                test_files_dir.append(dir_name + "/test/" + i)

        directories = (main_dir, train_dir, train_files_dir)

        for directory in directories:
            if isinstance(directory, list):  # Checking if the directory variable is a list
                for sub_directory in directory:
                    if os.path.isdir(sub_directory):
                        pass
                    else:
                        try:
                            os.mkdir(sub_directory)
                        except FileExistsError:
                            print(f"Folder '{sub_directory}' is already exists.")
            else:

                if os.path.isdir(directory):
                    pass
                else:
                    try:
                        os.mkdir(directory)
                    except FileExistsError:
                        print(f"Folder '{directory}' is already exists.")

        print('========= Directories have been created. =========')
    else:
        print(f'========= Directory {dir_name} already exists. =========')
                

In [1]:
def create_images(data, events_train, annotations, annotation_stage_id, psg_file, optim_num_imgs, ctrl_imgs):
    
    data.set_annotations(None) # to turn off the coloring of annotations

    for event_for_image in events_train:
        
        start_time = int(event_for_image[0] / data.info.get('sfreq')) # moment of the signal part on the graph

        if ctrl_imgs[event_for_image[2] - 1] < optim_num_imgs:

            fig = data.plot(
                start = start_time,
                duration = 30,
                scalings = dict(eeg=1e-4),
                n_channels = 1,
                order = [0, 1],

                show_scrollbars = False,
                show = False,
                show_scalebars = False
            )
            for one_ax in fig.axis:
                one_ax.axis('off')

            stage_list = list(annotation_stage_id.keys())
            stage_id = list(annotation_stage_id.values()).index(event_for_image[2])
            path_for_image = 'dataset/train/' + stage_list[stage_id] + '/' + stage_list[stage_id] + \
                                '_' + psg_file.split("-")[0] + '_' + str(start_time)

            fig.savefig(path_for_image)
            plt.close(fig)
            ctrl_imgs[event_for_image[2] - 1] += 1

    print(f'\n Images have been prepared for {psg_file.split("-")[0][0:-2]}. \n')

In [2]:
def convert_to_grayscale(): # Not necessary, because argument color_mode in the keras.utils.image_dataset_from_directory helps to convert images in grayscale
    path_for_image = "dataset/train"
    for folder in os.listdir(path_for_image):
        current = path_for_image + "/" + folder
        files = os.listdir(current)
        for file in files:
            if file[0] == '.':
                pass
            else:
                current_file_path = current + "/" + file
                img = Image.open(current_file_path)
                img = img.convert('L')
                img.save(current_file_path)
    print('========= Files have been converted. =========')

In [1]:
def dataset_create(files_directory, optim_num_imgs):

    all_files = os.listdir(files_directory)
    not_processed_files = []
    all_psg_files = []
    all_hyp_files = []

    ctrl_imgs = 6*[0] # List for the number of images (for 6 stages)

    for file in all_files:

        if file[0] == '.': # for ignoring hidden files in the directory
            continue

        parts = file.split("-")

        if parts[1] == "PSG.edf":
            all_psg_files.append(file)
        elif parts[1] == "Hypnogram.edf":
            all_hyp_files.append(file)

    # for controlling status progress
    old_process_percentage = 0
    process_percentage = 0
    iteration = 0

    for psg_file in all_psg_files:

        hyp_file = psg_file.split("-")[0][:-2] + "*" + "-Hypnogram.edf"
        possible_hyp = fnmatch.filter(all_hyp_files, hyp_file)

        if possible_hyp:

            hyp_file = possible_hyp[0]

            print(f"\n================ Files currently being processed: {psg_file}, {hyp_file} ================")

            psg_file_path = files_directory + "/" + psg_file
            hyp_file_path = files_directory + "/" + hyp_file

            data, annotations = file_keeper(psg_file_path, hyp_file_path)
            print("======== Got data and annotations. ========")

            events_train, annotation_stage_id = crop_set_annotations(data, annotations)
            print("======== Annotations cropped and set. ========")

            create_directories(annotation_stage_id)

            create_images(data, events_train, annotation_stage_id, psg_file, optim_num_imgs, ctrl_imgs)

        else:
            not_processed_file = psg_file.split("-")[0][:-2] # Get number of the candidate, i.e. SC4812
            print(f"No such hypnogram file for {not_processed_file}")
            not_processed_files.append(not_processed_file)

        iteration += 1

        process_percentage = round(iteration / len(all_psg_files) * 100)  # progress controlling

        if process_percentage != old_process_percentage:
            print(f"======== Extracting images from PSG signals: {process_percentage}% ========", end="\r")

        old_process_percentage = process_percentage

    print(f"======== Extracting images from PSG signals: {process_percentage}% ========", end="\r")
    # print("Starting converting images to grayscale.")  # Not necessary
    # convert_to_grayscale()
    print("Images for the dataset have been created.")
    if not_processed_files:
        print(f"Files that weren't processed: {not_processed_files}")



In [2]:
def split_dataset(dir): # Give the path for yours dataset for splitting

    print(f"======== The process of the dataset splitting has been started. ========")

    dir_train = dir + "/train"
    train_folders = os.listdir(dir_train)

    for folder in train_folders:
        current_folder = dir_train + "/" + folder
        number_of_files_to_move = round(0.2 * len(os.listdir(current_folder)))
        i = 1
        while i <= number_of_files_to_move:
            files_in_folder = os.listdir(current_folder)
            chosen_file = files_in_folder[random.randint(0, len(files_in_folder) - 1)]

            path_from = current_folder + "/" + chosen_file
            path_to = dir + "/test/" + folder + "/" + chosen_file

            shutil.move(path_from, path_to)
            i += 1

        print(f"Number of moved files to the test/{folder}: {number_of_files_to_move}")

    print("Dataset has been splitted.")


## PREDICTING DATASET

In [3]:
def create_dir_and_img_pred(name_predict_sig, data, events_train, annotations, annotation_stage_id, psg_file):

    # Creating a directory

    dir_name = "dataset_predict"

    name_predict_sig = dir_name + "/" + name_predict_sig # creating full path of the folder

    directories = (dir_name, name_predict_sig)

    for directory in directories:
        if os.path.isdir(directory):
            pass
        else:
            try:
                os.mkdir(directory)
                print(f"======== Directory {directory} has been created. ========")
            except FileExistsError:
                print(f"Folder '{directory}' already exists.")

    # Creating an image

    data.set_annotations(None) # to turn off the coloring of annotations

    i = 0
    old_process_percentage = 0
    process_percentage = 0

    for event_for_image in events_train:

        process_percentage = round(i / len(events_train) * 100) # process controlling
        if process_percentage != old_process_percentage:
            print(f"======== Extracting images from PSG signals: {process_percentage}% ========", end="\r")

        start_time = int(event_for_image[0] / data.info.get('sfreq')) # start moment of the signal part on the graph

        fig = data.plot(
            start=start_time,
            duration=30,
            scalings=dict(eeg=1e-4),
            n_channels=1,
            order=[0, 1],

            show_scrollbars=False,
            show=False,
            show_scalebars=False
        )
        for one_ax in fig.axes:
            one_ax.axis('off')

        stage_list = list(annotation_stage_id.keys())
        stage_id = list(annotation_stage_id.values()).index(event_for_image[2])
        path_for_image = name_predict_sig + '/' + stage_list[stage_id] + \
                        '_' + psg_file.split("-")[0] + '_' + str(start_time)

        fig.savefig(path_for_image)

        plt.close(fig)

        old_process_percentage = process_percentage

        i += 1

    print(f"\n Images have been preapred for {psg_file.split("-")[0][0:-2]}.\n")



In [None]:
def create_predict_dataset(files_directory):

    print(f"======== The process of predict dataset has been started. ========")
    print(f"======== During the process files of the supplied directory will be processed. ========\n")

    all_files = os.listdir(files_directory)
    not_processed_files = []
    all_psg_files = []
    all_hyp_files = []

    for file in all_files:
        if file[0] == '.': # for ignoring hidden files
            continue
        parts = file.split("-")
        if parts[1] == "PSG.edf":
            all_psg_files.append(file)
        elif parts[1] == "Hypnogram.edf":
            all_hyp_files.append(file)

    old_process_percentage = 0
    process_percentage = 0

    iteration = 0

    for psg_file in all_psg_files:

        process_percentage = round(iteration / len(all_psg_files) * 100) # process controlling
        if process_percentage != old_process_percentage:
            print(f"\n======== Extracting images from PSG signals: {process_percentage}% ========\n")

        hyp_file = psg_file.split("-")[0][:-2] + "*" + "-Hypnogram.edf"
        possible_hyp = fnmatch.filter(all_hyp_files, hyp_file)

        if possible_hyp:
            hyp_file = possible_hyp[0]

            print(f"======== Files currently being processed: {psg_file}, {hyp_file} ========")

            psg_file_path = files_directory + "/" + psg_file
            hyp_file_path = files_directory + "/" + hyp_file

            data, annotations = file_keeper(psg_file_path, hyp_file_path)
            print("======== Got data and annotations. ========")

            events_train, annotation_stage_id = crop_set_annotations(data, annotations)
            print("======== Annotations cropped and set. ========")

            name_predict_sig = psg_file.split("-")[0][:-2]
            create_dir_and_img_pred(name_predict_sig, data, events_train, annotations, annotation_stage_id, psg_file)

        else:
            not_processed_file = psg_file.split("-")[0][:-2] # Gets number of the candidate, i.e. SC4812
            print(f"No such hypnogram file for {not_processed_file}")
            not_processed_files.append(not_processed_file)

        old_process_percentage = process_percentage

        iteration += 1

    print(f"\n======== Extracting images from PSG signals: {process_percentage}% ========\n")


    # Not necessary
    # print("Starting converting images to grayscale.")
    # path_for_image_dir = "dataset_predict"
    # for folder in os.listdir(path_for_image_dir):
    #     current = path_for_image_dir + "/" + folder
    #     files = os.listdir(current)
    #     for file in files:
    #         current_file_path = current + "/" + file
    #         if file[0] == '.':
    #             pass
    #         else:
    #             img = Image.open(current_file_path)
    #             img = img.convert('L')
    #             img.save(current_file_path)
    # print("======== Files have been converted. ========")

    print("======== Images for the dataset have been created. ========")

    if not_processed_files:
        print(f"Files that weren't processed: {not_processed_files}")
