In [1]:
import numpy as np
import torch
import h5py
from torch.utils.data import DataLoader
import pandas as pd
import skorch
from skorch.dataset import Dataset
from skorch.callbacks import Checkpoint,ProgressBar
from skorch.helper import predefined_split
from configTUHdl import *
from dataset import *
from sklearn.metrics import roc_auc_score
from mne import set_log_level
import resampy
from skorch.callbacks import LRScheduler
from sklearn.metrics import classification_report
from WavenetLSTM import WavenetLSTM
set_log_level(False)
device = 'cuda' if cuda else 'cpu'

In [2]:
ch_names=['A1', 'A2', 'C3', 'C4', 'CZ', 'F3', 'F4', 'F7', 'F8', 'FP1','FP2', 'FZ',
               'O1', 'O2','P3', 'P4', 'PZ', 'T3', 'T4', 'T5', 'T6']
#Implementing Transverse Central Parietal (TCP) montage technique for a single sample
def tcp(data,fs):
    length=data.shape[1]
    new_data =np.zeros(shape=(20,length),dtype=np.float32)
    
    new_data[0] = (data[ch_names.index('FP1'),:]) - (data[ch_names.index('F7'),:])
    new_data[1] = (data[ch_names.index('FP2'),:]) - (data[ch_names.index('F8'),:])
    new_data[2] = (data[ch_names.index('F7'),:]) - (data[ch_names.index('T3'),:])
    new_data[3] = (data[ch_names.index('F8'),:]) - (data[ch_names.index('T4'),:])
    new_data[4] = (data[ch_names.index('T3'),:]) - (data[ch_names.index('T5'),:])
    new_data[5] = (data[ch_names.index('T4'),:]) - (data[ch_names.index('T6'),:])
    new_data[6] = (data[ch_names.index('T5'),:]) - (data[ch_names.index('O1'),:])
    new_data[7] = (data[ch_names.index('T6'),:]) - (data[ch_names.index('O2'),:])
    new_data[8] = (data[ch_names.index('T3'),:]) - (data[ch_names.index('C3'),:])
    new_data[9] = (data[ch_names.index('T4'),:]) - (data[ch_names.index('C4'),:])
    new_data[10] = (data[ch_names.index('C3'),:]) - (data[ch_names.index('CZ'),:])
    new_data[11] = (data[ch_names.index('CZ'),:]) - (data[ch_names.index('C4'),:])
    new_data[12] = (data[ch_names.index('FP1'),:]) - (data[ch_names.index('F3'),:])
    new_data[13] = (data[ch_names.index('FP2'),:]) - (data[ch_names.index('F4'),:])
    new_data[14] = (data[ch_names.index('F3'),:]) - (data[ch_names.index('C3'),:])
    new_data[15] = (data[ch_names.index('F4'),:]) - (data[ch_names.index('C4'),:])
    new_data[16] = (data[ch_names.index('C3'),:]) - (data[ch_names.index('P3'),:])
    new_data[17] = (data[ch_names.index('C4'),:]) - (data[ch_names.index('P4'),:])
    new_data[18] = (data[ch_names.index('P3'),:]) - (data[ch_names.index('O1'),:])
    new_data[19] = (data[ch_names.index('P4'),:]) - (data[ch_names.index('O2'),:])

    return new_data, fs

In [3]:
from scipy.signal import butter,iirnotch,filtfilt
butter_b,butter_a=butter(4,1,btype='highpass',fs=sampling_freq)
notch_b,notch_a=iirnotch(60,Q=30,fs=sampling_freq)

In [4]:
preproc_functions = []
#Cut to 2 minutes length
preproc_functions.append(lambda data, fs: (data[:, :int(duration_recording_mins * 60 * fs)], fs))
#Apply butterworth and notch filter
preproc_functions.append(lambda data, fs: (filtfilt(butter_b, butter_a, data), fs))
preproc_functions.append(lambda data, fs: (filtfilt(notch_b, notch_a, data), fs))
#Apply TCP montage technique
preproc_functions.append(tcp)
preproc_functions.append(lambda data, fs: (resampy.resample(data, fs,sampling_freq,axis=1,filter='kaiser_fast'),sampling_freq))

dataset = DiagnosisSet(n_recordings=n_recordings,
                           max_recording_mins=max_recording_mins,
                           preproc_functions=preproc_functions,
                           data_folders=data_folders,
                           train_or_eval='train',
                           sensor_types=sensor_types)
if test_on_eval:
    test_dataset = DiagnosisSet(n_recordings=n_recordings,
                           max_recording_mins=max_recording_mins,
                           preproc_functions=preproc_functions,
                           data_folders=data_folders,
                           train_or_eval='eval',
                           sensor_types=sensor_types)

In [5]:
X,y=dataset.load()
test_x,test_y=test_dataset.load()

In [6]:
train_data=np.array(X)
test_data=np.array(test_x)
del X,test_x

In [7]:
augmented_train_data=np.concatenate([train_data[:,:,:input_time_length] , train_data[:,:,-1:input_time_length-1:-1]])
augmented_test_data=np.concatenate([test_data[:,:,:input_time_length] , test_data[:,:,-1:input_time_length-1:-1]])

augmented_train_label=np.concatenate([y,y])
augmented_test_label=np.concatenate([test_y,test_y])

In [10]:
#HDF5 implementation
file_names=[]
split='train'
path=os.path.join(processed_folder,split)
for i in range(len(augmented_train_label)):
    file_path=f'{path}/{i}.hdf5'
    file_names.append(file_path)
    with h5py.File(file_path, 'a') as f:
        f['x']=augmented_train_data[i]
        f['y']=augmented_train_label[i]
    #np.savez_compressed(file_path,x=X[i],y=y[i])
file_names=pd.Series(file_names)
lbs=pd.Series(augmented_train_label)
train_dataframe=pd.DataFrame({'name':file_names,'label':lbs})
train_dataframe.to_excel(f"{processed_folder}/{split}.xlsx",index=False)

In [9]:
file_names=[]
split='eval'
path=os.path.join(processed_folder,split)
for i in range(len(augmented_test_label)):
    file_path=f'{path}/{i}.hdf5'
    file_names.append(file_path)
    with h5py.File(file_path, 'a') as f:
        f['x']=augmented_test_data[i]
        f['y']=augmented_test_label[i]

file_names=pd.Series(file_names)
lbs=pd.Series(augmented_test_label)
eval_dataframe=pd.DataFrame({'name':file_names,'label':lbs})
eval_dataframe.to_excel(f"{processed_folder}/{split}.xlsx",index=False)

In [2]:
class WindowDataset(torch.utils.data.Dataset):
    def __init__(self, excel_path):
        super().__init__()
        excel_file=pd.read_excel(excel_path)
        self.file_names=excel_file['name'].to_numpy(dtype=str)
        self.label=excel_file['label'].to_numpy()

    def __getitem__(self, index):
        with h5py.File(self.file_names[index], 'r') as h5_file:
            window=np.array(h5_file['x'])

        label=self.label[index]
        return window,label
 
    def __len__(self):
        return len(self.label)

In [3]:
#The train and test set need to be initialized here
test_set=WindowDataset(f'{processed_folder}/eval.xlsx')
train_set=WindowDataset(f'{processed_folder}/train.xlsx')

In [4]:
train_set.__getitem__(0)

(array([[  0.17097712,   1.3287303 ,   0.7692792 , ...,  -5.556323  ,
          24.01627   ,  -4.044311  ],
        [ -3.3584309 ,   0.09836197,  -5.3884964 , ...,  -8.297503  ,
         -17.203846  ,  -1.3521657 ],
        [  1.4771857 ,  -5.01991   ,  -2.4995058 , ..., -10.169931  ,
          -9.443199  ,   2.1237545 ],
        ...,
        [ -1.6094382 ,  -0.13247037,   0.74053544, ...,   2.6964524 ,
          -1.2182155 ,   2.372343  ],
        [ -1.5516642 ,  -3.4095576 ,  -2.2637873 , ...,   7.82981   ,
           8.16312   ,   8.779852  ],
        [ -2.2247756 ,  -3.852793  ,  -4.484772  , ...,  -0.29596758,
           1.6410568 ,   2.8635814 ]], dtype=float32),
 1)

In [4]:
model_name='wavenet'
criterion=torch.nn.CrossEntropyLoss
optimizer_lr=0.0005
n_chans=20

In [5]:
model=WavenetLSTM()

In [7]:
monitor = lambda net: any(net.history[-1, ('valid_accuracy_best','valid_f1_best','valid_loss_best')])
cp=Checkpoint(monitor='valid_f1_best',dirname='model',f_params=f'{model_name}best_param.pkl',
               f_optimizer=f'{model_name}best_opt.pkl', f_history=f'{model_name}best_history.json')
path=f'{model_name}'
classifier = skorch.NeuralNetClassifier(
        model,
        optimizer=torch.optim.AdamW,
        train_split=predefined_split(test_set),
        optimizer__lr=optimizer_lr,
        #optimizer__weight_decay=optimizer_weight_decay,
        iterator_train=DataLoader,
        iterator_valid=DataLoader,
        iterator_train__shuffle=True,
        iterator_train__pin_memory=True,
        iterator_valid__pin_memory=True,
        #iterator_train__num_workers=1,
        #iterator_valid__num_workers=1,
        #iterator_train__persistent_workers=True,
        #iterator_valid__persistent_workers=True,
        batch_size=batch_size,
        device=device,
        #callbacks=["accuracy","f1",cp,ProgressBar(detect_notebook=True)],
        warm_start=True,
        )
classifier.initialize()

NameError: name 'model' is not defined

In [None]:
test=np.random.rand(3,n_chans,input_time_length)
out=classifier.predict(test)
print(out)

In [None]:
#Used to load parameters for ongoing training
try:
    classifier.load_params(
        f_params=f'model/{path}_param.pkl', f_optimizer=f'model/{path}_opt.pkl', f_history=f'model/{path}_history.json')
    print("Paramters Loaded")
except:
    pass

In [None]:
classifier.fit(train_set,y=None,epochs=3)

In [None]:
classifier.save_params(
    f_params=f'model/{path}_param.pkl', f_optimizer=f'model/{path}_opt.pkl', f_history=f'model/{path}_history.json')

In [34]:
samples=[]
for i in range(test_set.__len__()):
    samples.append(test_set.__getitem__(i)[0])
samples=np.array(samples)

In [None]:
sample=test_set.__getitem__(0)[0][None,:,:]

In [36]:
classifier.load_params(
        f_params=f'model/{model_name}best_param.pkl', f_history=f'model/{model_name}best_history.json')
print("Paramters Loaded")

Paramters Loaded


In [38]:
out=classifier.predict(samples)

In [None]:
classifier.load_params(
        f_params=f'model/{model_name}best_param.pkl', f_history=f'model/{model_name}best_history.json')
print("Paramters Loaded")
pred_labels=classifier.predict(test_set)
actual_labels=[label[1] for label in test_set]

In [32]:
report=classification_report(actual_labels,pred_labels,target_names=['false','true'])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
auc=roc_auc_score(actual_labels,classifier.predict_proba(test_set)[:,1])
actual_labels=np.array(actual_labels)
accuracy=np.mean(pred_labels==actual_labels)
tp=np.sum(pred_labels*actual_labels)
precision=tp/np.sum(pred_labels)
recall=tp/np.sum(actual_labels)
f1=2*precision*recall/(precision+recall)

print(model_name)
print(f"Accuracy:{accuracy}")
print(f"F1-Score:{f1}")
print(f"roc_auc score:{auc}")