# Modeling

`20_230517_sliced_filt_patient_stmary.npy`를 이용해서 ResNet 모델을 학습시킨다. 보다 좀 더 독자적인 모델로 발전시키는 것도 좋을 것 같다.

**1. Normalization**
- 불러온 PLETH 데이터 각각에 대해서 `min-max` 정규화를 실행한다.

**2. Model Configuration**
- `Dataset split`: train(80%):validation(20%) 5-Fold CV
- `Testset`: BIDMC dataset
- `Batch size`: [4, 16, 32, 64, 128, 256] 
- `Epochs`: 100
- `Callbacks`: [Earlystopping, ModelCheckpoint]
- `Optimizer`: Adam
- `Loss function`: [MAE, MSE, RMSE]

In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dataset = np.load('../../DataWarehouse/stMary_RRpo/20_230517_sliced_filt_patient_stmary.npy', allow_pickle=True)
print(dataset.shape)
print(dataset[:5][:])
print(len(dataset[0][0]))

(489, 2)
[[array([ 1.34310827e-01, -1.72755175e+01, -3.50188641e+01, ...,
          2.53006996e+02,  3.04690014e+02,  3.49826494e+02])     18]
 [array([-173.89644469, -191.88503338, -207.42843855, ..., -614.50780984,
         -617.1630912 , -615.95674291])
  17]
 [array([-61.06533745, -44.254553  , -28.86251007, ..., -43.80296885,
         -55.05317591, -66.50906096])                                 17]
 [array([-231.89353892, -248.54359463, -265.19371548, ..., -394.30426383,
         -403.98687518, -414.40740483])
  16]
 [array([387.72504662, 418.03740933, 440.7437886 , ...,  14.56412795,
          -3.47130216, -20.03924253])                                 17]]
7500


In [3]:
random.seed(40)
random.shuffle(dataset)
print(dataset.shape)

(489, 2)


In [4]:
pleths = dataset[:,0]
resps = dataset[:,1].astype(np.float64)
print(pleths.shape, resps.shape)

(489,) (489,)


## 1. Normalization

In [5]:
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

In [6]:
scaler = MinMaxScaler()
scaled_pleths = np.asarray([scaler.fit_transform(pleth.reshape(-1,1)) for pleth in pleths], dtype=np.float64)
scaled_pleths.shape

(489, 7500, 1)

In [7]:
ratio_tr = 0.8
train_x, train_y = scaled_pleths[:int(len(scaled_pleths)*ratio_tr)], resps[:int(len(resps)*ratio_tr)]
val_x, val_y = scaled_pleths[int(len(scaled_pleths)*ratio_tr):], resps[int(len(resps)*ratio_tr):]
print(train_x.shape, train_y.shape)
print(val_x.shape, val_y.shape)

(391, 7500, 1) (391,)
(98, 7500, 1) (98,)


## Model Architecture: ResNet

In [8]:
import tensorflow as tf
from keras.layers import Conv1D, Add, LeakyReLU, MaxPooling1D, Flatten, Dense, BatchNormalization, Activation
print(f'Is GPU Avaliable: {tf.config.list_physical_devices("GPU")}')

2023-05-17 19:28:15.688455: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-17 19:28:15.734952: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Is GPU Avaliable: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [9]:
class ResnetIdentityBlock(tf.keras.Model):
    def __init__(self, filters, kernel_size, strides):
        super(ResnetIdentityBlock, self).__init__()
        self.block_conv1 = Conv1D(filters, kernel_size, strides=strides, padding='same')
        self.bn1 = BatchNormalization()
        self.block_conv2 = Conv1D(filters, kernel_size, strides=1, padding='same')
        self.bn2 = BatchNormalization()
        self.block_conv3 = Conv1D(filters, kernel_size, strides=1, padding='same')
        self.bn3 = BatchNormalization()
        self.block_add = Add()
        self.leaky_relu = LeakyReLU()


    def call(self, input, training=False):
        x0 = self.block_conv1(input)
        x0 = self.bn1(x0, training=training)
        x0 = Activation('relu')(x0)
        # x0 = self.leaky_relu(x0)

        x1 = self.block_conv2(x0)
        x1 = self.bn1(x1, training=training)
        x1 = Activation('relu')(x1)
        # x1 = self.leaky_relu(x1)
        x1 = self.block_conv3(x1)
        x1 = self.bn1(x1, training=training)
        x1 = Activation('relu')(x1)
        # x1 = self.leaky_relu(x1)

        x = self.block_add([x0, x1])
        # return self.leaky_relu(x)
        return Activation('relu')(x)

In [10]:
class ResNet(tf.keras.Model):
    def __init__(self):
        super(ResNet, self).__init__()
        self.resnet_block = [ResnetIdentityBlock(filters=6*(2**i), kernel_size=3, strides=2) for i in range(5)]
        self.max1d = MaxPooling1D(strides=2, padding='same')
        self.flatten = Flatten()
        self.dense0 = Dense(20, activation='relu')
        self.dense1 = Dense(10, activation='relu')
        self.dense2 = Dense(1)
        self.leaky_relu = LeakyReLU()
    
    # @tf.function
    def call(self, input, training=False):
        x = input
        for i in range(5):
            x = self.resnet_block[i](x, training)

        x = self.max1d(x)
        x = self.flatten(x)
        x = self.dense0(x)
        # x = self.leaky_relu(x)
        x = self.dense1(x)
        # x = self.leaky_relu(x)

        return self.dense2(x)
    

    @tf.function
    def train_step(self, data):
        '''
            train_step은 fit()를 타고 넘어온 data를 unpack하여 학습을 진행한다.
            compile()의 loss 함수를 토대로 self.compiled_loss를 진행한다.
            self.compiled_metrics는 compile()의 metrics를 토대로 진행한다.
        '''
        x, y = data
    
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}
    

    @tf.function
    def test_step(self, data):
        '''
            Evaluation을 위한 Custom function이다.
        '''
        x, y = data

        y_pred = self(x, training=False)
        self.compiled_loss(y, y_pred)
        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}

In [17]:
rrpo_model = ResNet()
EPOCHS = 100
BATCH_SIZE = [4, 16, 32, 64, 128, 256]
kf = KFold(n_splits=5)
rrpo_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=tf.keras.losses.MeanAbsoluteError(),
)
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
    tf.keras.callbacks.ModelCheckpoint('../models/230517-RRpo-batch4/', monitor='val_loss', save_best_only=True, save_weights_only=True)
]

In [15]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(BATCH_SIZE[4])
val_dataset = tf.data.Dataset.from_tensor_slices((val_x, val_y)).batch(BATCH_SIZE[4])

In [18]:
history = []
for train_idx, val_idx in kf.split(scaled_pleths):
    X_train, y_train = scaled_pleths[train_idx], resps[train_idx]
    X_val, y_val = scaled_pleths[val_idx], resps[val_idx]

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE[0])
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(BATCH_SIZE[0])

    hist = rrpo_model.fit(
        train_dataset,
        epochs=EPOCHS,
        callbacks=callbacks,
        validation_data=val_dataset,
    )
    # history.append(hist)

Epoch 1/100


2023-05-17 19:40:32.737642: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype double and shape [391]
	 [[{{node Placeholder/_1}}]]




2023-05-17 19:40:45.041305: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype double and shape [98]
	 [[{{node Placeholder/_1}}]]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 1/100
 5/98 [>.............................] - ETA: 2s - loss: 2.2297

2023-05-17 19:41:58.057687: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype double and shape [391]
	 [[{{node Placeholder/_1}}]]


Epoch 2/100


2023-05-17 19:42:01.028469: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype double and shape [98]
	 [[{{node Placeholder/_1}}]]


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 1/100
 5/98 [>.............................] - ETA: 2s - loss: 1.3834

2023-05-17 19:43:12.110794: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype double and shape [391]
	 [[{{node Placeholder/_1}}]]




2023-05-17 19:43:15.219897: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype double and shape [98]
	 [[{{node Placeholder/_1}}]]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

KeyboardInterrupt: 

In [None]:
try:
    with tf.device('/device:GPU:0'):
        hist = rrpo_model.fit(
            train_dataset,
            epochs=EPOCHS,
            callbacks=callbacks,
            validation_data=val_dataset    
        )
except RuntimeError as e:
    print(e)

In [None]:
history = []
for train_idx, val_idx in kf.split(scaled_pleths):
    X_train, y_train = scaled_pleths[train_idx], resps[train_idx]
    X_val, y_val = scaled_pleths[val_idx], resps[val_idx]

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE[0])
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(BATCH_SIZE[0])

    
    hist = rrpo_model.fit(
        train_dataset,
        epochs=EPOCHS,
        callbacks=callbacks,
        validation_data=val_dataset,
    )
    history.append(hist)

In [None]:
history