## Steps
1. read data
2. time differential
3. PSD (using welch)
4. extract features
5. match labels (target)
6. add patient id

In [None]:
import os
import glob
import re
import numpy as np
import numpy.fft as fft
import mne
import matplotlib.pyplot as plt
from scipy import stats, signal
from numpy import save, load

In [2]:
import tensorflow as tf
gpus=tf.config.experimental.list_physical_devices('GPU')
print("num gpus available", len(gpus))
tf.config.experimental.set_memory_growth(gpus[0], True)

num gpus available 1


In [3]:
subject_id = 10
base_path = "physionet.org/files/chbmit/1.0.0/"
edf_file_names = sorted(glob.glob(os.path.join(base_path, "chb{:02d}/*.edf".format(subject_id))))
summary_file = os.path.join(base_path, "chb{:02d}/chb{:02d}-summary.txt".format(subject_id, subject_id))


summary_content = open(summary_file,'r').read()

In [4]:
def extract_data_and_labels(edf_filename, summary_text):
    folder, basename = os.path.split(edf_filename)
    
    edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
    # X = edf.get_data().astype(np.float32) * 1e6 # to mV
    X= shuffle_channels(edf).astype(np.float32)
    print (X.shape)
    y = np.zeros(X.shape[1], dtype=np.int64)
    i_text_start = summary_text.index(basename)

    if 'File Name' in summary_text[i_text_start:]:
        i_text_stop = summary_text.index('File Name', i_text_start)
    else:
        i_text_stop = len(summary_text)
    assert i_text_stop > i_text_start

    file_text = summary_text[i_text_start:i_text_stop]
    # print(file_text)
    num_seizures = int(re.search(r"Number of Seizures in File: ([0-9]*)", file_text).group(1))
    
    # if 'Seizure ' in file_text:
    if num_seizures>0:
        for sez in range(1, num_seizures+1):
            # print(re.search(rf"Seizure[\s]*[{str(sez)}]* Start Time:[\s]* ([0-9]*) seconds", file_text))
            start_sec = int(re.search(rf"Seizure[\s]*[{str(sez)}]* Start Time:[\s]* ([0-9]*) seconds", file_text).group(1))
            print(start_sec)
            end_sec = int(re.search(rf"Seizure[\s]*[{str(sez)}]* End Time:[\s]* ([0-9]*) seconds", file_text).group(1))
            print(end_sec)
            i_seizure_start = int(round(start_sec * edf.info['sfreq']))
            i_seizure_stop = int(round((end_sec + 1) * edf.info['sfreq']))
            y[i_seizure_start:i_seizure_stop] = 1
    assert X.shape[1] == len(y)
    return X,y

In [5]:
channel_order= {'FP1-F7':0, 'F7-T7':1, 'T7-P7':2, 'P7-O1':3, 'FP1-F3':4, 'F3-C3':5, 'C3-P3':6, 'P3-O1':7, 'FP2-F4':8, 'F4-C4':9, 'C4-P4':10, 'P4-O2':11, 'FP2-F8':12, 'F8-T8':13, 'T8-P8-0':14, 'P8-O2':15, 'FZ-CZ':16, 'CZ-PZ':17, 'P7-T7':18, 'T7-FT9':19, 'FT9-FT10':20, 'FT10-T8':21 ,'T8-P8-1':22 }

In [6]:

def shuffle_channels(data):
    channels=data.ch_names
    d=data.get_data().astype(np.float32) * 1e6 
    res= [0]*23
    # res=np.zeros_like(res)
    i=0
    for ch in channels:
        if channel_order.get(ch)!=None:
            idx=channel_order[ch]
            res[idx]=np.array(d[i])
        i+=1
    r=np.zeros_like(res)
    for i in range(0,23):
        r[i]=res[i]
    return r

In [7]:
X_10 = []
y_10 = []
for edf_file_name in edf_file_names:
    X, y = extract_data_and_labels(edf_file_name, summary_content)
    X_10.append(X)
    y_10.append(y)

Extracting EDF parameters from d:\salma\UNI\Sem8\WS\physionet.org\files\chbmit\1.0.0\chb10\chb10_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
  edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
(23, 1848320)
Extracting EDF parameters from d:\salma\UNI\Sem8\WS\physionet.org\files\chbmit\1.0.0\chb10\chb10_02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
  edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
(23, 1843200)
Extracting EDF parameters from d:\salma\UNI\Sem8\WS\physionet.org\files\chbmit\1.0.0\chb10\chb10_03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
  edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
(23, 1843200)
Extracting EDF parameters from d:\salma\UNI\Sem8\WS\physionet.org\files\chbmit\1.0.0\chb10\chb10_04.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
  edf = mne.io.rea

In [8]:
len(X_10)

25

In [9]:
def time_differential(x_series):
    a=[]
    for ch in x_series:
        c=np.zeros_like(ch)
        for i in range (1,len(ch)):
            c[i]=ch[i]-ch[i-1]
        a.append(c)
    return np.array(a)

    

In [83]:
label_dict ={0: 'interictal', 1: 'ictal', 2: 'preictal', 3: 'dismiss'}

In [10]:
def label_series(labels):
    interictal=60*60*256
    preictal=30*60*256
    # interictal=4
    # preictal=2
    seizure_starts=[]
    seizure_ends=[]
    if labels[0]==1:
        seizure_starts.append(0)
    for i in range(1,len(labels)):
        if labels[i]==1 and labels[i-1]==0:
            seizure_starts.append(i)
        if labels[i]==0 and labels[i-1]==1:
            seizure_ends.append(i)
    # print(seizure_starts)
    # print(seizure_ends)
    for point in seizure_starts:
        # print(point)
        for j in range(max(0, point-preictal), point):
            if labels[j]!=1:
                labels[j]=2
        for k in range(max(0, point-interictal), point):
            if labels[k]==0:
                labels[k]=3
    for point in seizure_ends:
        for j in range(point, min(interictal+ point, len(labels))):
            if labels[j]==0:
                labels[j]=3
    return labels
        

           



In [85]:
def check_channel_match(channels):
    i=0
    for ch in channels:
        if channel_order.get(ch)==None or channel_order[ch]!=i:
            return False
        i+=1
    return True


In [58]:
edf_filename= 'physionet.org/files/chbmit/1.0.0/chb12/chb12_32.edf'
edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
X = edf.get_data().astype(np.float32) * 1e6 # to mV
print(edf.get_data()[10])
check_channel_match(edf.ch_names)
print(shuffle_channels(edf)[10])


Extracting EDF parameters from d:\salma\UNI\Sem8\WS\physionet.org\files\chbmit\1.0.0\chb12\chb12_32.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
  edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
  edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
--0, --1, --2, --3, --4
  edf = mne.io.read_raw_edf(edf_filename,stim_channel=None)
[-1.95360195e-07  1.95360195e-07  1.95360195e-07 ... -1.20048840e-03
  1.00004884e-03  3.60107448e-03]
  
[array([[ 2.12942613e-05,  1.95360195e-07,  1.95360195e-07, ...,
        -5.20146520e-03, -4.20122100e-03, -3.00092796e-03]])
 array([0.00000000e+00, 3.90625000e-03, 7.81250000e-03, ...,
       3.96498828e+03, 3.96499219e+03, 3.96499609e+03])]


- Channel 1: FP1-F7  ----0
- Channel 2: F7-T7   ----1
- Channel 3: T7-P7   ----2
- Channel 4: P7-O1   ----3
- Channel 5: -
- Channel 6: FP1-F3  ----4
- Channel 7: F3-C3   ----5
- Channel 8: C3-P3   ----6
- Channel 9: P3-O1   ----7
- Channel 10: -
- Channel 11: FZ-CZ   ----16
- Channel 12: CZ-PZ   ----17
- Channel 13: -
- Channel 14: FP2-F4   ----8
- Channel 15: F4-C4   ----9
- Channel 16: C4-P4  ----10
- Channel 17: P4-O2   ----11
- Channel 18: -
- Channel 19: FP2-F8   ----12
- Channel 20: F8-T8   ----13
- Channel 21: T8-P8   ----14
- Channel 22: P8-O2   ----15
- Channel 23: -
- Channel 24: P7-T7   ----18
- Channel 25: T7-FT9   ----19
- Channel 26: FT9-FT10   ----20
- Channel 27: FT10-T8   ----21
- Channel 28: T8-P8   ----22


In [11]:
patient_no=10#################
for filno in range(0,len(X_10)):###################
    recording=X_10[filno]####################
    discarded=0
    wins=recording.shape[1]//5120
    print('file: {} , shape: {} '.format(filno,recording.shape))
    y=label_series(y_10[filno])#############################33333
    datapoints_in=[] #for each file
    targets=[]
    i=0
    while i+5120<recording.shape[1]:
        # n_channels=recording.shape[0]
        n_channels=23
        features=np.zeros(n_channels*9+1)
        features[-1]=patient_no
        window=time_differential(recording[:,i:5120+i])
        freqs, psd = signal.welch(window, fs=256)
        psd[:,57:64]=0
        psd[:,117:124]=0
        target_range=np.array(y[i:i+5120])
        for chi in range (0, n_channels):
            alpha=np.sum(psd[chi,8:14])
            beta=np.sum(psd[chi,13:31])
            delta=np.sum(psd[chi,1:5])
            theta=np.sum(psd[chi, 4:9])
            gamma0=np.sum(psd[chi,30:48])
            gamma1=np.sum(psd[chi,53:76])
            gamma2=np.sum(psd[chi,75:98])
            gamma3=np.sum(psd[chi,103:])
            features[chi*9:(1+chi)*9-1]=[alpha,beta,delta,theta,gamma0,gamma1,gamma2,gamma3]
            total_power=np.sum(features[chi*9:(1+chi)*9-1])
            features[chi*9:(1+chi)*9-1] /= total_power
            features[(1+chi)*9-1]=total_power
        if np.all(target_range == target_range[0]):
            target=target_range[0]
            targets.append(target)
            datapoints_in.append(features)
        else:
            discarded+=1               
        i+=2560
    # dp=np.array(datapoints_in)
    save('features_{}_{}.npy'.format(patient_no, filno), datapoints_in)
    save('targets_{}_{}.npy'.format(patient_no, filno), targets)
    print('total number of windows: {} , number of windows discarded: {} '.format(i/2560, discarded))
    



file: 0 , shape: (23, 1848320) 
total number of windows: 720.0 , number of windows discarded: 0 
file: 1 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 2 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 3 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 4 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 5 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 6 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 7 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 8 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 8 
file: 9 , shape: (23, 1843200) 
total number of windows: 718.0 , number of windows discarded: 0 
file: 10 , shape: (23, 1843200

In [241]:

data = load('targets_9_7.npy')
# data2 = load('features/data_chb01/targets_1_0.npy')
len(data)

1424

In [244]:
data[905:930]

array([2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3], dtype=int64)

In [215]:
np.isnan(data).argmax()

118406

In [216]:
118406/208

569.2596153846154

In [152]:
y_06[0][13525*256:13526*256]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [53]:
50344/208

242.03846153846155

In [34]:
import tensorflow as tf
gpus=tf.config.experimental.list_physical_devices('GPU')
print("num gpus available", len(gpus))
tf.config.experimental.set_memory_growth(gpus[0], True)

num gpus available 1


In [123]:
 len([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

35

In [91]:
y_01[3]
np.savetxt("array_3d.csv",[y_01[3]],delimiter='\n',fmt="%d")
