<a href="https://colab.research.google.com/github/CarolinaR99/Projeto1_ADAR/blob/main/Sleep%20Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Parse EDF and .txt with annotations to a CSV file 

In [1]:
#@title
import os
import numpy as np
import pandas as pd
import urllib.request
from datetime import datetime
!pip install mne
import mne

def to_timestamp(x: str, acq_time: datetime):
    date = datetime.strptime(x, '%H:%M:%S')
    date = datetime(acq_time.year, acq_time.month, acq_time.day, date.hour, date.minute, date.second)
    if date.hour < 12:
        date = datetime(date.year, date.month, date.day + 1, date.hour, date.minute, date.second)
    return (date.day - acq_time.day) * 24 * 3600 + (date.hour - acq_time.hour) * 3600 + (
                date.minute - acq_time.minute) * 60 + date.second - acq_time.second


def subsample(data: np.ndarray, fs: int, fs_new: int, axis=0):
    assert fs % fs_new == 0
    factor = int(fs / fs_new)
    if data.shape[axis] % factor != 0:
        print('Subsampling led to loss of %i samples, in an online setting consider using a BlockBuffer with a '
              'buffer size of a multiple of %i samples.' % (data.shape[axis] % factor, factor))
    idx_mask = np.arange(data.shape[axis], step=factor)
    return data.take(idx_mask, axis)


def read_annotation_file(path_filename: str, acq_time: datetime):
    df_annotations_data = pd.read_csv(path_filename, sep='\t', skiprows=20)
    df_annotations_data = df_annotations_data.loc[df_annotations_data["Event"].str.startswith("SLEEP")]
    df_annotations_data = df_annotations_data.rename(columns={"Time [hh:mm:ss]": "onset", "Duration[s]": "duration"})
    df_annotations_data["timestamp"] = df_annotations_data.onset.apply(lambda x: to_timestamp(x, acq_time))

    return df_annotations_data


def merge_data_file_and_annotations(data, df_annotations, fs_new):
    raw_data = data.get_data()
    fs = int(data.info["sfreq"])
    df = pd.DataFrame(raw_data.T, columns=data.info.ch_names)
    df["times"] = data.times
    df = pd.DataFrame(data=subsample(df.to_numpy(), fs, fs_new), columns=df.columns)
    
    labels = [np.nan] * df.shape[0]
    for i in range(df_annotations.shape[0] - 1):
        row_start = df_annotations.iloc[i]
        row_end = df_annotations.iloc[i + 1]
        start_pos = row_start.timestamp * fs_new
        end_pos = row_end.timestamp * fs_new
        if start_pos < len(labels):
            labels[start_pos:end_pos] = [row_start["Sleep Stage"]]*(end_pos-start_pos)
    
    df["sleepstage"] = labels
    df = df.dropna()
    
    return df




In [2]:
# Subject ID
subject_name = 'n1'
# New frequency
fs_new = 128
# CSV Filename
new_filename = f'{subject_name}_data_and_annotations.csv'
              
data_filename = f'{subject_name}.edf'
filename_annotations = f'{subject_name}_annotations.txt'
url_data = f'https://physionet.org/files/capslpdb/1.0.0/{subject_name}.edf?download'
url_annotations = f'https://physionet.org/files/capslpdb/1.0.0/{subject_name}.txt?download'

print(f'Downloading data file: {url_data}')
urllib.request.urlretrieve(url_data, data_filename)

print(f'Downloading annotations data file: {url_annotations}')
urllib.request.urlretrieve(url_annotations, filename_annotations)


print(f'Loading File: {data_filename}')
data_polysomnography = mne.io.read_raw_edf(data_filename)

print(f'Loading Annotations file: {filename_annotations}')
df_annotations_polysomnography = read_annotation_file(filename_annotations, data_polysomnography.info["meas_date"])

print("\n\n Example Annotations file: \n")
df_annotations_polysomnography.head(4)

Downloading data file: https://physionet.org/files/capslpdb/1.0.0/n1.edf?download
Downloading annotations data file: https://physionet.org/files/capslpdb/1.0.0/n1.txt?download
Loading File: n1.edf
Extracting EDF parameters from /content/n1.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Loading Annotations file: n1_annotations.txt


 Example Annotations file: 



Unnamed: 0,Sleep Stage,Position,onset,Event,duration,Location,timestamp
0,W,Unknown Position,22:09:33,SLEEP-S0,30,ROC-LOC,210
1,W,Unknown Position,22:10:03,SLEEP-S0,30,ROC-LOC,240
2,W,Unknown Position,22:10:33,SLEEP-S0,30,ROC-LOC,270
3,W,Unknown Position,22:11:03,SLEEP-S0,30,ROC-LOC,300


In [None]:
print(f'Merge files annotation and polysomnography data. Downsampling data from {data_polysomnography.info["sfreq"]} to {fs_new} Hz')
df_data = merge_data_file_and_annotations(data_polysomnography, df_annotations_polysomnography, fs_new)
df_data.to_csv(new_filename, index=False)
print("Data Merged")
df_data.head(4)

Merge files annotation and polysomnography data. Downsampling data from 512.0 to 128 Hz


In [None]:
import matplotlib.pyplot as plt
_ = plt.plot(df_data.times/3600, df_data.sleepstage)
_ = plt.xlabel("Time (h)")
_ = plt.ylabel("Sleep Stage")

## Load CSV

In [None]:
df = pd.read_csv(new_filename)
df.head(4)

In [None]:
#df.head(4)

In [None]:
plt.figure(figsize = (30,5))
plt.plot(df["times"], df["ROC-LOC"])
plt.plot(df["times"], df["LOC-ROC"])
#plt.plot(df["times"], df["sleepstage"])

In [None]:
#sklearn.preprocessing.MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()


new_df = df.iloc[:,:-2]
new_df.head(4)
print(scaler.fit(new_df))

print(scaler.data_max_)
df_norm = scaler.transform(new_df)
df_norm = pd.DataFrame(df_norm)
df_norm['times'] = df['times']
df_norm['sleepstage'] = df['sleepstage']
df_norm.head(4)

In [None]:
df_norm.columns = df.columns 
df_norm.head(4)

In [None]:
columns_electrodes = [el for el in df.columns if el not in ["ROC-LOC","LOC-ROC","times", "sleepstage"]]
columns_electrodes.head(4)