In [1]:
import numpy as np
import tensorflow as tf
import keras
import os
from tqdm import tqdm
import time
import keras.backend as K

2022-02-28 00:55:55.505272: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
np.random.seed(1)

In [3]:
class DataGenerator(tf.compat.v2.keras.utils.Sequence):
    def __init__(self, data_path, ann_path, list_files, list_ann_files, 
                 batch_size=64, dim=(3000,1), n_classes=5, shuffle=True):
        # Constructor of the data generator.
        self.dim = dim
        self.batch_size = batch_size
        self.data_path = data_path
        self.ann_path = ann_path
        self.list_files = list_files
        self.list_ann_files = list_ann_files
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.get_cnts() #Get the data count for each file        
        self.on_epoch_end() #Initialize file indexes        
        

    def __len__(self):
        # Denotes the number of batches per epoch
        return int((self.total_len+1) / self.batch_size)
    

    def __getitem__(self, index):
        
        start = index*self.batch_size
        end = min((index+1)*self.batch_size, self.total_len)
        
        X = np.empty((end - start,) + self.dim, dtype=np.float32)
        y = np.empty((end - start,), dtype=np.int32)
        
        curr_file_idx, accum_start, accum_end = self.get_accum_idx(index)
        
        curr_file = self.list_files[self.file_indexes[curr_file_idx]]
        curr_ann_file = self.list_ann_files[self.file_indexes[curr_file_idx]]
        data_index = self.data_indexes[self.file_indexes[curr_file_idx]]
        
        curr_np = np.load(os.path.join(self.data_path, curr_file))
        curr_ann = np.load(os.path.join(self.ann_path, curr_ann_file))
        curr_np = curr_np[data_index]
        curr_ann = curr_ann[data_index]
        
        

        
        X_1 = curr_np[start - accum_start:end - accum_start] 
        y_1 = curr_ann[start - accum_start:end - accum_start]
        from_curr = min(accum_end - start, end - start)
        X[:from_curr] = np.expand_dims(X_1, axis=-1)
        y[:from_curr] = y_1
        
        if end > accum_end:
            curr_file_idx += 1
            accum_start = accum_end
            accum_end += self.list_cnt[self.file_indexes[curr_file_idx]]
            curr_file = self.list_files[self.file_indexes[curr_file_idx]]            
            data_index = self.data_indexes[self.file_indexes[curr_file_idx]]
            
            
            curr_ann_file = self.list_ann_files[self.file_indexes[curr_file_idx]]
            curr_np = np.load(os.path.join(self.data_path, curr_file))
            curr_ann = np.load(os.path.join(self.ann_path, curr_ann_file))

            curr_np = curr_np[data_index]
            curr_ann = curr_ann[data_index]
            #curr_np = curr_np.reshape(-1, 3000, 1)
            
            #curr_np = curr_np[1:-1]
            #curr_ann = curr_ann[1:-1]
            
            X_2 = curr_np[:end - accum_start]
            y_2 = curr_ann[:end - accum_start]
            X[from_curr:] = np.expand_dims(X_2, axis=-1)
            y[from_curr:] = y_2
        
        '''
        # Normalize data(MinMax)
        rng = np.max(X, axis=1) - np.min(X, axis=1) #X shape: (B, 3000, 1), rng: (B, 1)
        rng = np.expand_dims(rng, axis=1) #(B, 1, 1)
        X = (X - np.expand_dims(np.min(X, axis=1),axis=1)) / (rng + 1e-8)
        '''                
        return X, y
    
    def get_accum_idx(self, index):
        curr_file_idx = 0
        accum_start = 0
        accum_end = self.list_cnt[self.file_indexes[0]]
        for i in range(len(self.file_indexes)):
            if index * self.batch_size < accum_end:
                curr_file_idx = i                
                break            
            accum_start += self.list_cnt[self.file_indexes[i]]
            accum_end += self.list_cnt[self.file_indexes[i+1]]
        
        return curr_file_idx, accum_start, accum_end
        
    def on_epoch_end(self):        
        self.curr_file_idx = 0
        # This function is called at the end of each epoch.
        self.file_indexes = np.arange(len(self.list_files)) #This is necessary to shuffle files
        self.data_indexes = [np.arange(cnt) for cnt in self.list_cnt]
        if self.shuffle == True:
            np.random.shuffle(self.file_indexes)
            for i in range(len(self.list_cnt)):
                np.random.shuffle(self.data_indexes[i]) 
            
        #self.accum_start = 0 
        #self.accum_end = self.list_cnt[self.file_indexes[0]]                 
            
    def get_cnts(self):
        list_cnt = []
        for f in self.list_files:
            temp_np = np.load(os.path.join(self.data_path, f))
            cnt_data = temp_np.shape[0] 
            list_cnt.append(cnt_data)
            
        self.list_cnt = list_cnt
        self.total_len = sum(list_cnt)    

In [4]:
#curr_path = os.getcwd() + '/'
PROCESSED_DATA_PATH = os.path.join('/home','aiot','data','origin_npy')
save_signals_path = os.path.join(PROCESSED_DATA_PATH,'signals_SC_filtered')
save_annotations_path = os.path.join(PROCESSED_DATA_PATH,'annotations_SC')

In [5]:
def match_annotations_npy(dirname, filename):
    search_filename = filename.split('-')[0][:-2]
    file_list = os.listdir(dirname)
    filenames = [file for file in file_list if search_filename in file if file.endswith('.npy')]

    return filenames

In [6]:
dim_HT1D = (3000,1)
n_classes=6
epochs = 50
bs = 64
BASE_LEARNING_RATE = 1e-3
list_files = [f for f in os.listdir(save_signals_path) if f.endswith('.npy')]

In [7]:
def read_csv_to_list(filepath):
    import csv
    with open(filepath, newline='') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',')
        list_filepath = [row[0] for row in spamreader]
    return list_filepath

In [8]:
SC_train = os.path.join('/home','aiot','data','origin_npy','SC_train.csv')
SC_test = os.path.join('/home','aiot','data','origin_npy','SC_test.csv')

list_files_train = read_csv_to_list(SC_train)
list_files_test = read_csv_to_list(SC_test)

list_files_train = [f + '.npy' for f in list_files_train]
list_files_test = [f + '.npy' for f in list_files_test]

list_ann_files_train = []
list_ann_files_test = []
for f in list_files_train:
    ann_file = match_annotations_npy(save_annotations_path, f)
    list_ann_files_train.append(ann_file[0])
    
for f in list_files_test:
    ann_file = match_annotations_npy(save_annotations_path, f)
    list_ann_files_test.append(ann_file[0])

In [9]:
train_generator = DataGenerator(save_signals_path, save_annotations_path, list_files_train, list_ann_files_train, 
                          batch_size=bs, dim=dim_HT1D, n_classes=n_classes, shuffle=True)

In [10]:
test_generator = DataGenerator(save_signals_path, save_annotations_path, list_files_test, list_ann_files_test, 
                          batch_size=bs, dim=dim_HT1D, n_classes=n_classes, shuffle=False)

In [11]:
# Calculate class weight
# Tested loss with class weight, but doesn't improve the accuracy

from collections import defaultdict
cnt_class = defaultdict(int)
for x, y in train_generator:
    unique, counts = np.unique(y, return_counts=True)
    for i, cnt in zip(unique, counts):
        cnt_class[i] += cnt
cnt_class_np = np.array(list(cnt_class.values()))
class_weight = sum(cnt_class_np)/(n_classes * cnt_class_np)


In [12]:
len(list_ann_files_train), len(list_ann_files_test)

(27, 12)

In [13]:
list_ann_files_train

['SC4001EC-Hypnogram.npy',
 'SC4002EC-Hypnogram.npy',
 'SC4011EH-Hypnogram.npy',
 'SC4012EC-Hypnogram.npy',
 'SC4021EH-Hypnogram.npy',
 'SC4022EJ-Hypnogram.npy',
 'SC4031EC-Hypnogram.npy',
 'SC4032EP-Hypnogram.npy',
 'SC4041EC-Hypnogram.npy',
 'SC4042EC-Hypnogram.npy',
 'SC4051EC-Hypnogram.npy',
 'SC4052EC-Hypnogram.npy',
 'SC4061EC-Hypnogram.npy',
 'SC4062EC-Hypnogram.npy',
 'SC4131EC-Hypnogram.npy',
 'SC4141EU-Hypnogram.npy',
 'SC4142EU-Hypnogram.npy',
 'SC4151EC-Hypnogram.npy',
 'SC4152EC-Hypnogram.npy',
 'SC4161EC-Hypnogram.npy',
 'SC4162EC-Hypnogram.npy',
 'SC4171EU-Hypnogram.npy',
 'SC4172EC-Hypnogram.npy',
 'SC4181EC-Hypnogram.npy',
 'SC4182EC-Hypnogram.npy',
 'SC4191EP-Hypnogram.npy',
 'SC4192EV-Hypnogram.npy']

In [27]:
import modules
import importlib 
importlib.reload(modules)  # Python 3.4+

<module 'modules' from '/home/keondopark/sleep/modules.py'>

In [28]:
model = modules.Conv1DAttention2()

In [29]:
x = np.random.random((1,3000,1))
x = tf.convert_to_tensor(x)
model(x)

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[0.16749106, 0.1742762 , 0.15472369, 0.16053179, 0.17803392,
        0.1649434 ]], dtype=float32)>

In [30]:
model.summary()

Model: "conv1d_attention2_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_block_34 (conv1d_bloc multiple                  320       
_________________________________________________________________
conv1d_block_35 (conv1d_bloc multiple                  256       
_________________________________________________________________
conv1d_block_36 (conv1d_bloc multiple                  480       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 multiple                  0         
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 multiple                  0         
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 multiple                  0         
_________________________________________________________________
conv1d_block_37 (conv1d_bloc multiple          

In [31]:
def get_current_lr(epoch):
    lr = BASE_LEARNING_RATE
    for _ in range(epoch // 10):
        lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    optimizer.learning_rate = lr

In [32]:
def weighted_categorical_crossentropy(weights):
    """
    A weighted version of keras.objectives.categorical_crossentropy
    
    Variables:
        weights: numpy array of shape (C,) where C is the number of classes
    
    Usage:
        weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
        loss = weighted_categorical_crossentropy(weights)
        model.compile(loss=loss,optimizer='adam')
    """
    
    weights = K.variable(weights)
        
    def loss(y_true, y_pred):
        bs = y_pred.shape[0]
        # scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # calc
        loss = y_true * K.log(y_pred) * weights
        #loss = -K.sum(loss, -1)
        loss = -K.sum(loss) / bs
        return loss
    
    return loss

In [33]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
loss_fn = weighted_categorical_crossentropy(weights=class_weight)

In [34]:
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, './ckpt_Advanced_Conv1D', max_to_keep=1)

In [35]:
start_epoch = 0
#if manager.latest_checkpoint:
#    ckpt.restore(manager.latest_checkpoint)
#    start_epoch = ckpt.step.numpy()-1

In [36]:
best_test_acc = 0.0
for e in range(start_epoch, epochs):
    correct, total_cnt, total_loss = 0.0, 0.0, 0.0
    print('-'*20, 'Epoch ' + str(e) + '-'*20)
    adjust_learning_rate(optimizer, e)
    start = time.time()
    for idx, (x, y) in enumerate(train_generator):   
        y_onehot = tf.one_hot(y, depth=n_classes)
        with tf.GradientTape() as tape:
            y_pred = model(x, training=True)
            loss = loss_fn(y_onehot, y_pred)
            #loss = loss_fn(y, y_pred)
        
        total_cnt += y_pred.shape[0]
        y_pred_cls = tf.math.argmax(y_pred, axis=-1)
        correct += tf.reduce_sum(tf.cast(tf.equal(y_pred_cls, y), tf.float32))
        total_loss += loss * y_pred.shape[0]
        if (idx + 1) % 10 == 0 or idx+1 == len(train_generator):
            print("[%d / %d] Training loss: %.6f, Training acc: %.3f"%
                  (idx+1, len(train_generator), total_loss / total_cnt, correct / total_cnt),end='\r', flush=True)
        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
    print("")
    print("Training time: %.2f sec "%(time.time() - start))
    
    start = time.time()
    
    correct, total_cnt, total_loss = 0.0, 0.0, 0.0
    for idx, (x, y) in enumerate(test_generator):
        y_pred = model(x, training=False)
        y_pred_cls = tf.math.argmax(y_pred, axis=-1)
        correct += tf.reduce_sum(tf.cast(tf.equal(y_pred_cls, y), tf.float32))
        total_cnt += y_pred.shape[0]
        y = tf.cast(y, dtype=tf.int32)
        y_onehot = tf.one_hot(y, depth=n_classes)
        #total_loss += loss_fn(y, y_pred).numpy() * y_pred.shape[0]
        total_loss += loss_fn(y_onehot, y_pred).numpy() * y_pred.shape[0]
            
        test_acc = correct / total_cnt
        test_loss = total_loss / total_cnt
        if (idx + 1) % 10 == 0 or idx+1 == len(test_generator):
            print("[%d / %d] test loss: %.6f, test accuracy: %.3f"%
                  (idx+1, len(test_generator), test_loss, test_acc),end='\r', flush=True)
    print("")
    print("Eval time: %.2f sec"%(time.time() - start))
    ckpt.step.assign_add(1)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        save_path = manager.save()
        print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
    

-------------------- Epoch 0--------------------
[1162 / 1162] Training loss: 3.180745, Training acc: 0.798
Training time: 130.09 sec 
[515 / 515] test loss: 146.889364, test accuracy: 0.709
Eval time: 27.83 sec
Saved checkpoint for step 2: ./ckpt_Advanced_Conv1D/ckpt-1
-------------------- Epoch 1--------------------
[1162 / 1162] Training loss: 3.385102, Training acc: 0.726
Training time: 130.57 sec 
[515 / 515] test loss: 56.402125, test accuracy: 0.661
Eval time: 27.20 sec
-------------------- Epoch 2--------------------
[1162 / 1162] Training loss: 3.588799, Training acc: 0.720
Training time: 129.54 sec 
[515 / 515] test loss: 158.990808, test accuracy: 0.682
Eval time: 29.97 sec
-------------------- Epoch 3--------------------
[1162 / 1162] Training loss: 3.572134, Training acc: 0.792
Training time: 129.64 sec 
[515 / 515] test loss: 153.431744, test accuracy: 0.764
Eval time: 29.66 sec
Saved checkpoint for step 5: ./ckpt_Advanced_Conv1D/ckpt-2
-------------------- Epoch 4-------

[515 / 515] test loss: 150.171728, test accuracy: 0.789
Eval time: 27.82 sec
-------------------- Epoch 38--------------------
[1162 / 1162] Training loss: 1.679446, Training acc: 0.874
Training time: 132.22 sec 
[515 / 515] test loss: 150.187687, test accuracy: 0.790
Eval time: 27.85 sec
-------------------- Epoch 39--------------------
[1162 / 1162] Training loss: 1.678872, Training acc: 0.874
Training time: 132.26 sec 
[515 / 515] test loss: 150.203463, test accuracy: 0.790
Eval time: 27.65 sec
-------------------- Epoch 40--------------------
[1162 / 1162] Training loss: 1.677057, Training acc: 0.872
Training time: 132.42 sec 
[515 / 515] test loss: 150.204918, test accuracy: 0.790
Eval time: 27.85 sec
-------------------- Epoch 41--------------------
[1162 / 1162] Training loss: 1.676919, Training acc: 0.872
Training time: 132.07 sec 
[515 / 515] test loss: 150.207589, test accuracy: 0.791
Eval time: 27.65 sec
-------------------- Epoch 42--------------------
[1162 / 1162] Trainin

In [None]:
correct, total_cnt, total_loss = 0.0, 0.0, 0.0
confusion_matrix = np.zeros((n_classes,n_classes))
for idx, (x, y) in enumerate(test_generator):
    y_pred = model(x, training=False)
    y_pred_cls = tf.math.argmax(y_pred, axis=-1)
    correct += tf.reduce_sum(tf.cast(tf.equal(y_pred_cls, y), tf.float32))
    total_cnt += y_pred.shape[0]
    y = tf.cast(y, dtype=tf.int32)    
    for i in range(n_classes):
        for j in range(n_classes):
            confusion_matrix[i,j] += np.sum((y_pred_cls.numpy()==i) * (y.numpy()==j))


In [None]:
for i in range(n_classes):
    print_ln = ""
    for j in range(n_classes):
        print_ln += "%.3f "%(confusion_matrix[i,j] / np.sum(confusion_matrix[i]))
        #print_ln += "%d "%(confusion_matrix[i,j])
    print(print_ln)

In [None]:
x = np.arange(10)
np.random.seed(1)
np.random.shuffle(x)

In [None]:
x

In [None]:
y = np.arange(20)
np.random.shuffle(y)

In [None]:
y

In [None]:
pc = np.array([[0,0,1],[1,0,0],[0,1,0], [3,4,5]])

In [None]:
n = pc.shape[0]
x = np.tile(np.expand_dims(pc,1), [1,n,1])
y = np.empty((n,n,3))
y[:] = np.tile(np.expand_dims(pc,0), [n,1,1])
dist = np.sum((x - y) ** 2, axis=2) ** 0.5 # n by n matrix

In [None]:
instances_list = []
thr = 2
for i in range(1, n):    
    included = False
    for inst_set in instances_list:
        if i in inst_set:
            included = True
            break
    if not included:
        close_pts = set()           
        q = [i]
        while q:
            j = q.pop()
            #if j in close_pts: continue
            new_pts = set(np.where(dist[j,:] < thr)[0])
            add_pts = new_pts - close_pts
            q += list(add_pts)
            close_pts = close_pts.union(add_pts)
        
        instances_list.append(close_pts)
centroids = []
for s in instances_list:
    cent = np.mean(pc[list(s),:], axis=0)
    centroids.append(cent)
    
    

In [None]:
centroids, instances_list

In [None]:
x = np.array([[[1,2,3,4],[5,6,7,8],[4,3,2,1]], [[1,2,3,4],[5,9,7,8],[4,3,2,1]]])

In [None]:
x.shape

In [None]:
x[:,:,0] == 5

In [None]:
np.sum((x[:,:,0] == 5) * (x[:,:,1] == 6))

In [None]:
import pandas as pd
import numpy as np

In [None]:
a = np.random.random((2,3000))
b = np.ones(2)
c = 'abc'

In [None]:
df = pd.DataFrame(a)
df['b'] = b
df['c'] = c

In [None]:
df

In [None]:
d = np.random.random((2,3000))
e = np.ones(2)*2
f = 'def'
df2 = pd.DataFrame(d)
df2['b'] = e
df2['c'] = f

In [None]:
df2

In [None]:
df.append(df2)