In [1]:
import numpy as np
import tensorflow as tf
import keras
import os
from tqdm import tqdm
import time
from sklearn.preprocessing import scale
import keras.backend as K

2022-02-22 21:34:24.508856: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
np.random.seed(1)

In [3]:
class DataGenerator(tf.compat.v2.keras.utils.Sequence):
    def __init__(self, data_path, list_files, list_ann_files, 
                 batch_size=64, dim=(3000,1), n_classes=5, shuffle=True):
        # Constructor of the data generator.
        self.dim = dim
        self.batch_size = batch_size
        self.data_path = data_path
        self.list_files = list_files
        self.list_ann_files = list_ann_files
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.get_cnts() #Get the data count for each file        
        self.on_epoch_end() #Initialize file indexes        
        

    def __len__(self):
        # Denotes the number of batches per epoch
        return int((self.total_len+1) / self.batch_size)
    

    def __getitem__(self, index):
        
        start = index*self.batch_size
        end = min((index+1)*self.batch_size, self.total_len)
        
        X = np.empty((end - start,) + self.dim, dtype=np.float32)
        y = np.empty((end - start,), dtype=np.int32)
        
        curr_file_idx, accum_start, accum_end = self.get_accum_idx(index)
        
        curr_file = self.list_files[self.file_indexes[curr_file_idx]]
        curr_ann_file = self.list_ann_files[self.file_indexes[curr_file_idx]]
        data_index = self.data_indexes[self.file_indexes[curr_file_idx]]
        
        if curr_file.startswith('SC'):
            file_path = os.path.join(self.data_path, 'signals_SC_filtered', curr_file)
            ann_path = os.path.join(self.data_path, 'annotations_SC', curr_ann_file)
        else:
            file_path = os.path.join(self.data_path, 'signals_ST_filtered', curr_file)
            ann_path = os.path.join(self.data_path, 'annotations_ST', curr_ann_file)
        
        curr_np = np.load(file_path)
        curr_ann = np.load(ann_path)
        curr_np = curr_np[data_index]
        curr_ann = curr_ann[data_index]
        
        X_1 = curr_np[start - accum_start:end - accum_start] 
        y_1 = curr_ann[start - accum_start:end - accum_start]
        from_curr = min(accum_end - start, end - start)
        X[:from_curr] = np.expand_dims(X_1, axis=-1)
        y[:from_curr] = y_1
        
        if end > accum_end:
            curr_file_idx += 1
            accum_start = accum_end
            accum_end += self.list_cnt[self.file_indexes[curr_file_idx]]
            curr_file = self.list_files[self.file_indexes[curr_file_idx]]            
            curr_ann_file = self.list_ann_files[self.file_indexes[curr_file_idx]]
            data_index = self.data_indexes[self.file_indexes[curr_file_idx]]
            
            if curr_file.startswith('SC'):
                file_path = os.path.join(self.data_path, 'signals_SC_filtered', curr_file)
                ann_path = os.path.join(self.data_path, 'annotations_SC', curr_ann_file)
            else:
                file_path = os.path.join(self.data_path, 'signals_ST_filtered', curr_file)
                ann_path = os.path.join(self.data_path, 'annotations_ST', curr_ann_file)
                
            curr_np = np.load(file_path)
            curr_ann = np.load(ann_path)

            curr_np = curr_np[data_index]
            curr_ann = curr_ann[data_index]
            
            X_2 = curr_np[:end - accum_start]
            y_2 = curr_ann[:end - accum_start]
            X[from_curr:] = np.expand_dims(X_2, axis=-1)
            y[from_curr:] = y_2
        
        '''
        # Normalize data(MinMax)
        rng = np.max(X, axis=1) - np.min(X, axis=1) #X shape: (B, 3000, 1), rng: (B, 1)
        rng = np.expand_dims(rng, axis=1) #(B, 1, 1)
        X = (X - np.expand_dims(np.min(X, axis=1),axis=1)) / (rng + 1e-8)
        '''                
        return X, y
    
    def get_accum_idx(self, index):
        curr_file_idx = 0
        accum_start = 0
        accum_end = self.list_cnt[self.file_indexes[0]]
        for i in range(len(self.file_indexes)):
            if index * self.batch_size < accum_end:
                curr_file_idx = i                
                break            
            accum_start += self.list_cnt[self.file_indexes[i]]
            accum_end += self.list_cnt[self.file_indexes[i+1]]
        
        return curr_file_idx, accum_start, accum_end
        
    def on_epoch_end(self):        
        self.curr_file_idx = 0
        # This function is called at the end of each epoch.
        self.file_indexes = np.arange(len(self.list_files)) #This is necessary to shuffle files
        self.data_indexes = [np.arange(cnt) for cnt in self.list_cnt]
        if self.shuffle == True:
            np.random.shuffle(self.file_indexes)
            for i in range(len(self.list_cnt)):
                np.random.shuffle(self.data_indexes[i]) 
            
        #self.accum_start = 0 
        #self.accum_end = self.list_cnt[self.file_indexes[0]]                 
            
    def get_cnts(self):
        list_cnt = []
        for f in self.list_ann_files:
            if f.startswith('SC'):
                #file_path = os.path.join(self.data_path, 'signals_SC_filtered', curr_file)
                ann_path = os.path.join(self.data_path, 'annotations_SC', f)
            else:
                #file_path = os.path.join(self.data_path, 'signals_ST_filtered', curr_file)
                ann_path = os.path.join(self.data_path, 'annotations_ST', f)
            temp_np = np.load(ann_path)
            cnt_data = temp_np.shape[0] 
            list_cnt.append(cnt_data)
            
        self.list_cnt = list_cnt
        self.total_len = sum(list_cnt)    

In [4]:
curr_path = os.getcwd() + '/'
PROCESSED_DATA_PATH = os.path.join('/home','aiot','data','origin_npy')
save_signals_path = os.path.join(PROCESSED_DATA_PATH,'signals_SC_filtered')
save_annotations_path = os.path.join(PROCESSED_DATA_PATH,'annotations_SC')

In [5]:
def match_annotations_npy(dirname, filename):
    search_filename = filename.split('-')[0][:-2]
    file_list = os.listdir(dirname)
    filenames = [file for file in file_list if search_filename in file if file.endswith('.npy')]

    return filenames

In [6]:
dim_HT1D = (3000,1) 
n_classes=6
epochs = 50
bs = 64
BASE_LEARNING_RATE = 1e-3
list_files = [f for f in os.listdir(save_signals_path) if f.endswith('.npy')]

save_signals_path_ST = os.path.join(PROCESSED_DATA_PATH,'signals_ST_filtered')
save_annotations_path_ST = os.path.join(PROCESSED_DATA_PATH,'annotations_ST')

list_files_ST = [f for f in os.listdir(save_signals_path_ST) if f.endswith('.npy')]


In [7]:
def read_csv_to_list(filepath):
    import csv
    with open(filepath, newline='') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',')
        list_filepath = [row[0] for row in spamreader]
    return list_filepath


In [8]:
#SC_train = os.path.join('/home','aiot','data','origin_npy','SC_train.csv')
#SC_test = os.path.join('/home','aiot','data','origin_npy','SC_test.csv')

#list_files_train = read_csv_to_list(SC_train)
#list_files_test = read_csv_to_list(SC_test)

#list_files_train = [f + '.npy' for f in list_files_train]
#list_files_test = [f + '.npy' for f in list_files_test]

split_cnt = int(len(list_files) * 0.7)
list_files_train = list_files[:split_cnt]
list_files_test = list_files[split_cnt:]

split_cnt = int(len(list_files_ST) * 0.7)
list_files_train += list_files_ST[:split_cnt]
list_files_test += list_files_ST[split_cnt:]
#list_ann_files_train = list_ann_files[:split_cnt]
#list_ann_files_test = list_ann_files[split_cnt:]


list_ann_files_train = []
list_ann_files_test = []
for f in list_files_train:
    if f.startswith('SC'):
        ann_file = match_annotations_npy(save_annotations_path, f)
    else:
        ann_file = match_annotations_npy(save_annotations_path_ST, f)
    list_ann_files_train.append(ann_file[0])
    
for f in list_files_test:
    if f.startswith('SC'):
        ann_file = match_annotations_npy(save_annotations_path, f)
    else:
        ann_file = match_annotations_npy(save_annotations_path_ST, f)
    list_ann_files_test.append(ann_file[0])

#list_files_train = list_files[:5]
#list_files_test = list_files[80:90]
#list_ann_files_train = list_ann_files[0:5]
#list_ann_files_test = list_ann_files[80:90]

In [9]:
train_generator = DataGenerator(PROCESSED_DATA_PATH, list_files_train, list_ann_files_train, 
                          batch_size=bs, dim=dim_HT1D, n_classes=n_classes, shuffle=True)

In [10]:
test_generator = DataGenerator(PROCESSED_DATA_PATH, list_files_test, list_ann_files_test, 
                          batch_size=bs, dim=dim_HT1D, n_classes=n_classes, shuffle=False)

In [11]:
# Calculate class weight
# Tested loss with class weight, but doesn't improve the accuracy
'''
from collections import defaultdict
cnt_class = defaultdict(int)
for x, y in train_generator:
    unique, counts = np.unique(y, return_counts=True)
    for i, cnt in zip(unique, counts):
        cnt_class[i] += cnt
cnt_class_np = np.array(list(cnt_class.values()))
class_weight = sum(cnt_class_np)/(n_classes * cnt_class_np)
'''

'\nfrom collections import defaultdict\ncnt_class = defaultdict(int)\nfor x, y in train_generator:\n    unique, counts = np.unique(y, return_counts=True)\n    for i, cnt in zip(unique, counts):\n        cnt_class[i] += cnt\ncnt_class_np = np.array(list(cnt_class.values()))\nclass_weight = sum(cnt_class_np)/(n_classes * cnt_class_np)\n'

In [12]:
import resnet1D
import resnet1D_Ahmed

In [13]:
import importlib 
importlib.reload(resnet1D)  # Python 3.4+
importlib.reload(resnet1D_Ahmed)  # Python 3.4+

<module 'resnet1D_Ahmed' from '/home/keondopark/sleep/resnet1D_Ahmed.py'>

In [14]:
#model = resnet1D.ResNet34(input_shape=(3000,1), num_classes=n_classes)
model = resnet1D_Ahmed.eegnet()

2022-02-22 21:34:52.492822: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2022-02-22 21:34:53.366768: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 3090 computeCapability: 8.6
coreClock: 1.695GHz coreCount: 82 deviceMemorySize: 23.70GiB deviceMemoryBandwidth: 871.81GiB/s
2022-02-22 21:34:53.367738: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2022-02-22 21:34:53.999954: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2022-02-22 21:34:54.000013: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
2022-02-22 21:34:54.139737: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcu

In [15]:
def weighted_categorical_crossentropy(weights):
    """
    A weighted version of keras.objectives.categorical_crossentropy
    
    Variables:
        weights: numpy array of shape (C,) where C is the number of classes
    
    Usage:
        weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
        loss = weighted_categorical_crossentropy(weights)
        model.compile(loss=loss,optimizer='adam')
    """
    
    weights = K.variable(weights)
        
    def loss(y_true, y_pred):
        bs = y_pred.shape[0]
        # scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # calc
        loss = y_true * K.log(y_pred) * weights
        #loss = -K.sum(loss, -1)
        loss = -K.sum(loss) / bs
        return loss
    
    return loss

In [16]:
optimizer = tf.keras.optimizers.Adam(learning_rate=BASE_LEARNING_RATE)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
#loss_fn = weighted_categorical_crossentropy(weights=class_weight) #class_weight)

In [17]:
def get_current_lr(epoch):
    lr = BASE_LEARNING_RATE
    for _ in range(epoch // 10):
        lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    optimizer.learning_rate = lr

In [18]:
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, './ckpt_ResNet341D_ST', max_to_keep=1)

In [19]:
start_epoch = 0
if manager.latest_checkpoint:
    ckpt.restore(manager.latest_checkpoint)
    start_epoch = ckpt.step.numpy()-1

In [None]:
best_test_acc = 0.0
for e in range(start_epoch, epochs):
    correct, total_cnt, total_loss = 0.0, 0.0, 0.0
    print('-'*20, 'Epoch ' + str(e) + '-'*20)
    adjust_learning_rate(optimizer, e)
    start = time.time()
    for idx, (x, y) in enumerate(train_generator):               
        #y_onehot = tf.one_hot(y, depth=n_classes)
        with tf.GradientTape() as tape:
            y_pred = model(x, training=True)
            loss = loss_fn(y, y_pred)
        
        total_cnt += y_pred.shape[0]
        y_pred_cls = tf.math.argmax(y_pred, axis=-1)
        correct += tf.reduce_sum(tf.cast(tf.equal(y_pred_cls, y), tf.float32))
        total_loss += loss * y_pred.shape[0]
        if (idx + 1) % 10 == 0:
            print("[%d / %d] Training loss: %.6f, Training acc: %.3f"%
                  (idx+1, len(train_generator), total_loss / total_cnt, correct / total_cnt),end='\r', flush=True)
        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
    print("")
    print("Training time: %.2f sec "%(time.time() - start))
    
    start = time.time()
    
    correct, total_cnt, total_loss = 0.0, 0.0, 0.0
    for idx, (x, y) in enumerate(test_generator):
        y_pred = model(x, training=False)
        y_pred_cls = tf.math.argmax(y_pred, axis=-1)
        #y_onehot = tf.one_hot(y, depth=n_classes)
        correct += tf.reduce_sum(tf.cast(tf.equal(y_pred_cls, y), tf.float32))
        total_cnt += y_pred.shape[0]
        y = tf.cast(y, dtype=tf.int32)
        total_loss += loss_fn(y, y_pred).numpy() * y_pred.shape[0]
            
        test_acc = correct / total_cnt
        test_loss = total_loss / total_cnt
        if (idx + 1) % 10 == 0:
            print("[%d / %d] test loss: %.6f, test accuracy: %.3f"%
                  (idx+1, len(test_generator), test_loss, test_acc),end='\r', flush=True)
    print("")
    print("Eval time: %.2f sec"%(time.time() - start))
    ckpt.step.assign_add(1)
    if test_acc > best_test_acc:
        save_path = manager.save()
        print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
    

-------------------- Epoch 9--------------------


2022-02-22 21:34:58.830049: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8
2022-02-22 21:35:00.588938: I tensorflow/stream_executor/cuda/cuda_dnn.cc:359] Loaded cuDNN version 8201
2022-02-22 21:35:05.532750: E tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-02-22 21:35:05.532817: W tensorflow/stream_executor/gpu/asm_compiler.cc:56] Couldn't invoke ptxas --version
2022-02-22 21:35:05.534020: E tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-02-22 21:35:05.534478: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2022-02-22 21:35:05.753353: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully ope

[4930 / 4939] Training loss: 0.322008, Training acc: 0.883
Training time: 1159.52 sec 
[2130 / 2139] test loss: 0.710047, test accuracy: 0.806
Eval time: 150.72 sec
Saved checkpoint for step 11: ./ckpt_ResNet341D_ST/ckpt-10
-------------------- Epoch 10--------------------
[4930 / 4939] Training loss: 0.337886, Training acc: 0.877
Training time: 1122.17 sec 
[2130 / 2139] test loss: 0.540484, test accuracy: 0.847
Eval time: 139.01 sec
Saved checkpoint for step 12: ./ckpt_ResNet341D_ST/ckpt-11
-------------------- Epoch 11--------------------
[4930 / 4939] Training loss: 0.325218, Training acc: 0.881
Training time: 1117.91 sec 
[2130 / 2139] test loss: 0.534706, test accuracy: 0.847
Eval time: 137.06 sec
Saved checkpoint for step 13: ./ckpt_ResNet341D_ST/ckpt-12
-------------------- Epoch 12--------------------
[4930 / 4939] Training loss: 0.320388, Training acc: 0.882
Training time: 1117.07 sec 
[2130 / 2139] test loss: 0.537012, test accuracy: 0.848
Eval time: 139.24 sec
Saved checkpo

[4930 / 4939] Training loss: 0.302038, Training acc: 0.888
Training time: 1129.06 sec 
[2130 / 2139] test loss: 0.514251, test accuracy: 0.859
Eval time: 139.98 sec
Saved checkpoint for step 41: ./ckpt_ResNet341D_ST/ckpt-40
-------------------- Epoch 40--------------------
[4930 / 4939] Training loss: 0.302015, Training acc: 0.887
Training time: 1134.15 sec 
[2130 / 2139] test loss: 0.513969, test accuracy: 0.859
Eval time: 141.75 sec
Saved checkpoint for step 42: ./ckpt_ResNet341D_ST/ckpt-41
-------------------- Epoch 41--------------------
[4930 / 4939] Training loss: 0.301892, Training acc: 0.887
Training time: 1126.71 sec 
[2130 / 2139] test loss: 0.513679, test accuracy: 0.860
Eval time: 137.14 sec
Saved checkpoint for step 43: ./ckpt_ResNet341D_ST/ckpt-42
-------------------- Epoch 42--------------------
[4930 / 4939] Training loss: 0.301852, Training acc: 0.887
Training time: 1121.86 sec 
[2130 / 2139] test loss: 0.513353, test accuracy: 0.860
Eval time: 137.13 sec
Saved checkpo

In [None]:
model.summary()