In [3]:
import numpy as np
import pandas as pd
import os
import glob
from matplotlib import pyplot as plt

In [4]:
#removes the metadata from the time series, as first entry is (start time : end time)
def csv_to_numpy(series):
    series = pd.read_csv(series)
    series = series.iloc[:,0]
    series = series.to_numpy(dtype = float)
    return series

In [58]:
#mapping for sleep stages to integers
mapping = {"WK" : 0, "REM": 1, "N1": 2, "N2": 3, "N3" : 4}

def sleepstages_to_numpy(sleepstages):
    sleepstages = pd.read_csv(sleepstages)
    sleepstages = sleepstages.replace({"Schlafstadium" : mapping})
    sleepstages = sleepstages.iloc[:,2]
    sleepstages = sleepstages.to_numpy(dtype=int)
    sleepstages = sleepstages[:1091]
    return sleepstages

In [6]:
#matches the sleep stages in the sleepstaging csv file to the data
def match_sleepstages_to_data(data, sleepstages):
    data = np.vstack((data, np.zeros(data.shape[1])))
    for index, stage in enumerate(sleepstages):
        data[-1][(index*300):(index+1):300] = stage
    return data

In [54]:
#creates non-overlapping windows. default is set to 300 = 30 sec * 10Hz
#reshaping implicitly creates sliding windows for non-overlapping windows
def create_sliding_windows(data, window_size = 300, max_size = 327300):
    data = data[:max_size]
    data = np.reshape(data, (max_size//window_size, -1))
    return data

In [56]:
#reads all time series from the directory and adds sleep stages
def read_data_from_dir(dir):
    data = []
    for filename in glob.glob(os.path.join(dir, '*.csv')):
        if "SleepStaging" not in filename:
            series = csv_to_numpy(filename)
            data.append(create_sliding_windows(series))
            #print(filename)
    return np.asarray(data)

In [57]:
#stack all data to get patient x modality x windows x window size
dirs = ["sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 29, male, 7 years", 
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 75, female, 5 years",
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 80, female, 5 years",
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 89, female 6 years",
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 91, female, 7 years"]

data = []
for dir in dirs:
    data.append(read_data_from_dir(dir))

data = np.stack((data[0], data[1], data[2], data[3], data[4]), axis = 0)

data.shape

(5, 11, 1091, 300)

In [60]:
#stack all sleep stages to get patients x labels
sleepstage_files = ["sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 29, male, 7 years\SleepStaging.csv",
                    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 75, female, 5 years\SleepStaging.csv",
                    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 80, female, 5 years\SleepStaging.csv",
                    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 89, female 6 years\SleepStaging.csv",
                    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 91, female, 7 years\SleepStaging.csv"
                    ]

labels = []
for file in sleepstage_files:
    labels.append(sleepstages_to_numpy(file))

labels = np.stack((labels[0], labels[1], labels[2], labels[3], labels[4]), axis = 0)
labels.shape

(5, 1091)