In [None]:
from pathlib import Path
from google.colab import drive

drive.mount('/content/gdrive')

root_dir = Path('/content/gdrive/MyDrive/Data')

Mounted at /content/gdrive


In [None]:
!pip install mne
!pip install import_ipynb
!pip install modin

Collecting mne
  Downloading mne-1.5.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mne
Successfully installed mne-1.5.0
Collecting import_ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Collecting jedi>=0.16 (from IPython->import_ipynb)
  Downloading jedi-0.19.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, import_ipynb
Successfully installed import_ipynb-0.1.4 jedi-0.19.0
Collecting modin
  Downloading modin-0.23.1-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas<2.1,>=2 (from modin)
  Downloading pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
[2K   

In [None]:
import os
import random
import shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import import_ipynb

import torch
from torch import nn
import mne

In [None]:
data_path = root_dir / Path("Sleep EDF Data/Telemetry")
files = list(data_path.iterdir())

data = mne.io.read_raw_edf(files[0])
res = np.array(data.get_data())

Extracting EDF parameters from /content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7151J0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


In [None]:
def rotate_and_reverse(arr):
  arr_rotated = np.rot90(arr)
  return arr_rotated[::-1]

res_roated_reversed = rotate_and_reverse(res)

In [None]:
res.shape, res_roated_reversed.shape

((5, 3851000), (3851000, 5))

In [None]:
data.ch_names

['EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'EMG submental', 'Marker']

In [None]:
file_names = dict()

In [None]:
def extract_key(file_name):
  key = file_name.split("/")
  return key[-1][:7], key[-1][9:12] == 'PSG'

for file in files:
  key, is_PSG = extract_key(str(file))
  if key not in file_names:
    file_names[key] = {"PSG": None, "Annotation": None}
  if is_PSG:
    file_names[key]["PSG"] = file
  else:
    file_names[key]["Annotation"] = file

In [None]:
def load_edf_file(file_name, is_PSG=True):
  res = None
  if is_PSG:
    data = mne.io.read_raw_edf(file_name)
    res = data
    #res = np.array(data.get_data())
  else:
    data = mne.read_annotations(file_name)
    res = data
  return res

In [None]:
# Removing bad and unaligned files
file_names.pop("ST7081J")
file_names.pop("ST7172J")
file_names.pop("ST7072J")
file_names.pop("ST7171J")
file_names.pop("ST7201J")
file_names.pop("ST7121J")
file_names.pop("ST7221J")

{'PSG': PosixPath('/content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7221J0-PSG.edf'),
 'Annotation': PosixPath('/content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7221JA-Hypnogram.edf')}

In [None]:
def is_continuous(annotation):
  for i in range(1, len(annotation)):
    prev = annotation[i-1]
    onset = 0
    if prev['onset'] != None:
      onset = prev['onset']
    if prev['duration']+ onset != annotation[i]['onset']:
      return False
  return True

def check_dictionary_files(file_names):
  for file_name in file_names:
    annot = load_edf_file(file_names[file_name]["Annotation"], False)
    res = is_continuous(annot)
    print(f"{file_name}: {is_continuous(annot)}")

In [None]:
check_dictionary_files(file_names)

ST7151J: True
ST7132J: True
ST7092J: True
ST7212J: True
ST7061J: True
ST7122J: True
ST7131J: True
ST7071J: True
ST7012J: True
ST7112J: True
ST7102J: True
ST7182J: True
ST7152J: True
ST7022J: True
ST7181J: True
ST7082J: True
ST7041J: True
ST7042J: True
ST7052J: True
ST7242J: True
ST7101J: True
ST7162J: True
ST7011J: True
ST7062J: True
ST7111J: True
ST7191J: True
ST7192J: True
ST7202J: True
ST7091J: True
ST7021J: True
ST7241J: True
ST7141J: True
ST7161J: True
ST7211J: True
ST7051J: True
ST7142J: True
ST7222J: True


In [None]:
def convert_psg_to_dataframe(res_data):
  d = {'EEG Fpz-Cz': res_data[0], 'EEG Pz-Oz': res_data[1], 'EOG horizontal': res_data[2], 'EMG submental': res_data[3]}
  df = pd.DataFrame(data=d)
  return df

def annotation_info(annot):
  total_time = 0
  onset = 0
  if annot[0]['onset'] != None:
    onset = annot[0]['onset']
  for item in annot:
    total_time = total_time + item['duration']
  return onset, total_time

def convert_annotation_to_dataframe(annot, window=30):
  labels = []
  for item in annot:
    duration = item['duration']
    desc = item['description']
    label = [desc for i in range(int(duration/30))]
    labels.extend(label)
  d = {"labels":np.array(labels)}
  df = pd.DataFrame(data=d)
  return df

def process_file_names(file_names, number_files = 12):
  files = list(file_names.values())[:number_files]
  psgs = []
  annots = []
  for i in range(0, number_files, 5):
    psgs_temp = []
    annots_temp = []
    for j in range(min(number_files-i, 5)):
      file = files[i+j]
      psg = load_edf_file(file["PSG"]).get_data()
      psg_df = convert_psg_to_dataframe(psg)
      annot = load_edf_file(file["Annotation"], False)
      onset, total_time = annotation_info(annot)
      annot_df = convert_annotation_to_dataframe(annot)
      psg_df = psg_df.iloc[int(onset * 100): int((onset + total_time) * 100)]
      psgs_temp.append(psg_df)
      annots_temp.append(annot_df)
    psg_temp_df = pd.concat(psgs_temp, axis=0)
    psg_temp_df = psg_temp_df.reset_index(drop=True)
    annot_temp_df = pd.concat(annots_temp, axis=0)
    annot_temp_df = annot_temp_df.reset_index(drop=True)
    psgs.append(psg_temp_df)
    annots.append(annot_temp_df)
  psg_df = pd.concat(psgs, axis=0)
  psg_df = psg_df.reset_index(drop=True)
  annot_df = pd.concat(annots, axis=0)
  annot_df = annot_df.reset_index(drop=True)
  return psg_df, annot_df

In [None]:
psg_df, annot_df = process_file_names(file_names, number_files = 20)

Extracting EDF parameters from /content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7151J0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7132J0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7092J0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7212J0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /content/gdrive/MyDrive/Data/Sleep EDF Data/Telemetry/ST7061J0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /content/gdrive/MyDrive/Data/

In [None]:
save_psg = root_dir / Path("Sleep EDF Data/Processed/PSG.pkl")
save_annot = root_dir / Path("Sleep EDF Data/Processed/Annot.pkl")

psg_df.shape, annot_df.shape

((57930000, 4), (19310, 1))

In [None]:
psg_df.to_pickle(save_psg)
annot_df.to_pickle(save_annot)