In [None]:
import numpy as np
import pandas as pd
import os
import glob
from matplotlib import pyplot as plt

In [None]:
#removes the metadata from the time series, as first entry is (start time : end time)
def csv_to_numpy(series):
    series = pd.read_csv(series)
    series = series.iloc[:,0]
    series = series.to_numpy(dtype = float)
    return series

In [88]:
#mapping for sleep stages to integers
mapping = {"WK" : 0, "REM": 1, "N1": 2, "N2": 3, "N3" : 4}

def sleepstages_to_numpy(sleepstages):
    sleepstages = pd.read_csv(sleepstages)
    sleepstages = sleepstages.replace({"Schlafstadium" : mapping})
    sleepstages = sleepstages.iloc[:,2]
    sleepstages = sleepstages.to_numpy(dtype=int)
    return sleepstages

In [89]:
#matches the sleep stages in the sleepstaging csv file to the data
def match_sleepstages_to_data(data, sleepstages):
    data = np.vstack((data, np.zeros(data.shape[1])))
    for index, stage in enumerate(sleepstages):
        data[-1][(index*300):(index+1):300] = stage
    return data

In [85]:
#reads all time series from the directory and adds sleep stages
def read_data_from_dir(dir):
    data = []
    for filename in glob.glob(os.path.join(dir, '*.csv')):
        if "SleepStaging" not in filename:
            data.append(csv_to_numpy(filename))
            #print(filename)
        else:
            sleepstages = sleepstages_to_numpy(filename)
    data = np.asarray(data)
    data = match_sleepstages_to_data(data, sleepstages)
    return data

In [None]:
#creates non-overlapping windows. default is set to 300 = 30 sec * 10Hz
def create_sliding_windows(data, window_size = 300):
    pass

In [None]:
dir = "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 29, male, 7 years"

data = read_data_from_dir(dir)
x = np.zeros(data.shape[1])
data = np.vstack((data, x))

sleepstages = sleepstages_to_numpy("sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 29, male, 7 years\SleepStaging.csv")

for index, stage in enumerate(sleepstages):
    data[-1][(index)*300:(index+1)*300] = stage


In [113]:
dirs = ["sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 29, male, 7 years", 
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 75, female, 5 years",
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 80, female, 5 years",
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 89, female 6 years",
    "sleep_data_downsampling_AllSensorChannels_ lowfrequency_10HZ\patient 91, female, 7 years"]

data = []
for dir in dirs:
    data.append(read_data_from_dir(dir))

#327310 = shortest of the sequences; stack sequences along new axis to obtain time series x data x patients
data = np.dstack((data[0][:, :327310], data[1][:, :327310], data[2][:, :327310], data[3][:, :327310], data[4][:, :327310]))
data.shape

(12, 327310, 5)