In [1]:
!pip install fabio



In [2]:
import numpy as np
import fabio
import matplotlib as plt
import os
import mne
from mne.datasets.sleep_physionet.age import fetch_data
from mne.time_frequency import psd_welch
from collections import Counter

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer

In [3]:
DATA_PATH = ".\\data"

In [4]:
def get_patients(path=DATA_PATH):
    patients = os.listdir(path)
    return [os.path.join(path, item) for item in patients]

In [5]:
PATIENTS = get_patients()
print(PATIENTS)

['.\\data\\PSGData1_Hang7', '.\\data\\PSGData2_Hang7']


In [6]:
def get_edf(path):
    files = os.listdir(path)
    for file in files:
        if file[-4:] == '.edf':
            return os.path.join(path, file)
    print("No .edf file in ->", path)
    return None

In [7]:
get_edf(PATIENTS[0])

'.\\data\\PSGData1_Hang7\\20190917-T3-93135.edf'

In [8]:
def load_raw_edf(path):
    raw_train = mne.io.read_raw_edf(path)
    return raw_train

In [9]:
data = load_raw_edf(get_edf(PATIENTS[0]))

Extracting EDF parameters from E:\ZJU_Research\SR_ZJUPH_PSG\data\PSGData1_Hang7\20190917-T3-93135.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


Thor, Abdo
  raw_train = mne.io.read_raw_edf(path)


In [10]:
def show_shape(data):
    print(data)
    print("length ->", len(data))
    for i in range(23):
        print("length of <", i + 1, "> is", 
              len(data[i]), " \t==== ", 
              len(data[i][0][0]), " \t==== ", 
              len(data[i][1]), " \t==== ")
        print("data[i][1][0] ->", data[i][1][0], 
              "\ndata[i][1][1000] ->", data[i][1][1000], 
              "\ndata[i][1][36441087] ->", data[i][1][36441088 - 1],
              "\ndata[i][0] ->", data[i][0][0],
              "\ndata[i][1] ->", data[i][1],
              "\nmax() ->", max(data[i][1]))

In [11]:
show_shape(data)

<RawEDF | 20190917-T3-93135.edf, 23 x 36441088 (35587.0 s), ~26 kB, data not loaded>
length -> 36441088
length of < 1 > is 2  	====  36441088  	====  36441088  	==== 
data[i][1][0] -> 0.0 
data[i][1][1000] -> 0.9765625 
data[i][1][36441087] -> 35586.9990234375 
data[i][0] -> [ 1.14473182e-09 -3.84092346e-06  1.79722896e-07 ... -1.49829394e-05
 -1.29755352e-05 -4.08743188e-06] 
data[i][1] -> [0.00000000e+00 9.76562500e-04 1.95312500e-03 ... 3.55869971e+04
 3.55869980e+04 3.55869990e+04] 
max() -> 35586.9990234375
length of < 2 > is 2  	====  36441088  	====  36441088  	==== 
data[i][1][0] -> 0.0 
data[i][1][1000] -> 0.9765625 
data[i][1][36441087] -> 35586.9990234375 
data[i][0] -> [1.14473182e-09 1.10249415e-05 6.04761822e-06 ... 1.30668593e-05
 1.08532024e-05 6.65499783e-06] 
data[i][1] -> [0.00000000e+00 9.76562500e-04 1.95312500e-03 ... 3.55869971e+04
 3.55869980e+04 3.55869990e+04] 
max() -> 35586.9990234375
length of < 3 > is 2  	====  36441088  	====  36441088  	==== 
data[i][1][

length of < 21 > is 2  	====  36441088  	====  36441088  	==== 
data[i][1][0] -> 0.0 
data[i][1][1000] -> 0.9765625 
data[i][1][36441087] -> 35586.9990234375 
data[i][0] -> [0. 0. 0. ... 0. 0. 0.] 
data[i][1] -> [0.00000000e+00 9.76562500e-04 1.95312500e-03 ... 3.55869971e+04
 3.55869980e+04 3.55869990e+04] 
max() -> 35586.9990234375
length of < 22 > is 2  	====  36441088  	====  36441088  	==== 
data[i][1][0] -> 0.0 
data[i][1][1000] -> 0.9765625 
data[i][1][36441087] -> 35586.9990234375 
data[i][0] -> [7.26024262 7.26227436 7.26436658 ... 7.24001331 7.24053375 7.24092077] 
data[i][1] -> [0.00000000e+00 9.76562500e-04 1.95312500e-03 ... 3.55869971e+04
 3.55869980e+04 3.55869990e+04] 
max() -> 35586.9990234375
length of < 23 > is 2  	====  36441088  	====  36441088  	==== 
data[i][1][0] -> 0.0 
data[i][1][1000] -> 0.9765625 
data[i][1][36441087] -> 35586.9990234375 
data[i][0] -> [0. 0. 0. ... 0. 0. 0.] 
data[i][1] -> [0.00000000e+00 9.76562500e-04 1.95312500e-03 ... 3.55869971e+04
 3.

In [12]:
def get_txt(path):
    files = os.listdir(path)
    for file in files:
        if file[-4:] == ".txt":
            return os.path.join(path, file)
    print("No .txt file in ->", path)
    return None

In [13]:
get_txt(PATIENTS[0])

'.\\data\\PSGData1_Hang7\\20190917-T3-93135.txt'

In [14]:
def load_txt_label(path):
    return open(path, mode="r", encoding="UTF8").read().split("\n")

In [15]:
label = load_txt_label(get_txt(PATIENTS[0]))

In [16]:
def show_label(label):
    print("len(label) ->", len(label))
    cnt = Counter(label)
    print(dict(cnt))
    return dict(cnt)

In [17]:
show_label(label)

len(label) -> 1187
{'W': 304, 'N1': 82, 'N2': 464, 'N3': 153, 'R': 183, '': 1}


{'W': 304, 'N1': 82, 'N2': 464, 'N3': 153, 'R': 183, '': 1}

In [19]:
label[-1]

''

In [20]:
label = label[:-1]

In [21]:
show_label(label)

len(label) -> 1186
{'W': 304, 'N1': 82, 'N2': 464, 'N3': 153, 'R': 183}


{'W': 304, 'N1': 82, 'N2': 464, 'N3': 153, 'R': 183}

In [22]:
print(len(data)/len(label))

30726.04384485666
