# Modeling

In [1]:
import gzip
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt

DATA_PATH = '/root/Workspace/DataWarehouse/stMary_RRpo'

In [2]:
with gzip.open(f'{DATA_PATH}/21_230518_resamp_sliced125_filt_patient_stmary.pickle.gzip', 'rb') as f:
    dataset = pickle.load(f)

print(len(dataset), len(dataset[0][0]))

random.seed(42)
random.shuffle(dataset)

pleths = []
resps = []
for ppg, rr in dataset:
    pleths.append(ppg.astype(np.float64))
    resps.append(rr)

pleths = np.asarray(pleths)
resps = np.asarray(resps)
print(pleths.shape, resps.shape)

6508 1800
(6508, 1800) (6508,)


## Normalization

In [3]:
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

In [4]:
scaler = MinMaxScaler()
scaled_pleths = np.asarray([scaler.fit_transform(pleth.reshape(-1,1)) for pleth in pleths])
print(scaled_pleths.shape, type(scaled_pleths[0][0][0]))

ratio_tr = 0.8
train_x, train_y = scaled_pleths[:int(len(scaled_pleths)*ratio_tr)], resps[:int(len(resps)*ratio_tr)]
val_x, val_y = scaled_pleths[int(len(scaled_pleths)*ratio_tr):], resps[int(len(resps)*ratio_tr):]
print(train_x.shape, train_y.shape)
print(val_x.shape, val_y.shape)

(6508, 1800, 1) <class 'numpy.float64'>
(5206, 1800, 1) (5206,)
(1302, 1800, 1) (1302,)


## Architecture: ResNet 34 Layer

- ResNet 모델 구조는 [논문](https://arxiv.org/pdf/1512.03385.pdf)을 참고하였다. 다만 해당 논문은 ImageNet의 데이터를 실행시키기 위한 것으로 조금의 조정이 필요한데 이를 변경하였을 때 그 내용을 논문에 기재할 필요가 있는지 의문이다.
- 또는 만약 기재해야 한다면 내가 전부터 참고했던 [Bian의 논문](https://ieeexplore.ieee.org/document/9176231)을 참고해도 될 것 같다.

In [5]:
import keras
import tensorflow as tf
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D, Dense, BatchNormalization, Activation, Add, Flatten
print(f'Is GPU Avaliable: {tf.config.list_physical_devices("GPU")}')

2023-05-19 16:26:24.968220: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-19 16:26:25.008626: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Is GPU Avaliable: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [6]:
class ResidualBlock(Model):
    def __init__(self, filters, kernel_size, strides, identity_mapping=None, *args, **kwargs):
        super(ResidualBlock, self).__init__(*args, **kwargs)
        self.conv1 = Conv1D(filters=filters, kernel_size=kernel_size, strides=strides[0], padding='same')
        self.bn1 = BatchNormalization()

        self.conv2 = Conv1D(filters=filters, kernel_size=kernel_size, strides=strides[1], padding='same')
        self.bn2 = BatchNormalization()

        self.identity_mapping = identity_mapping
        self.conv_identity = Conv1D(filters=filters, kernel_size=1, strides=strides[0], padding='same')
        

    def call(self, inputs, training=None, mask=None):
        identity = inputs
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = Activation('relu')(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        # 448, 64 / 224, 128
        if self.identity_mapping:
            identity = self.conv_identity(inputs)
            # print(inputs.shape, identity.shape)

        x = Add()([x, identity])
        return Activation('relu')(x)

In [7]:
class ResNet34(Model):
    def __init__(self, *args, **kwargs):
        super(ResNet34, self).__init__(*args, **kwargs)
        self.conv1 = Conv1D(filters=64, kernel_size=7, strides=2)
        self.max1d = MaxPooling1D(pool_size=3, strides=2)
        self.resnet_block1 = [ResidualBlock(64, 3, (1,1)) for i in range(3)]
        
        self.resnet_block2_entry = ResidualBlock(128, 3, (2,1), identity_mapping=True)
        self.resnet_block2 = [ResidualBlock(128, 3, (1,1)) for i in range(3)]

        self.resnet_block3_entry = ResidualBlock(256, 3, (2,1), identity_mapping=True)
        self.resnet_block3 = [ResidualBlock(256, 3, (1,1)) for i in range(5)]

        self.resnet_block4_entry = ResidualBlock(512, 3, (2,1), identity_mapping=True)
        self.resnet_block4 = [ResidualBlock(512, 3, (1,1)) for i in range(2)]

        self.avg1d = AveragePooling1D(strides=2, padding='same')
        self.flatten = Flatten()
        self.d100 = Dense(100, activation='relu')
        self.d50 = Dense(50, activation='relu')
        self.d10 = Dense(10, activation='relu')
        self.d1 = Dense(1)

    
    def call(self, inputs, training=None, mask=None):
        x = self.conv1(inputs)
        x = self.max1d(x)

        for block in self.resnet_block1:
            x = block(x, training=training)

        x = self.resnet_block2_entry(x, training=training)
        for block in self.resnet_block2:
            x = block(x, training=training)

        x = self.resnet_block3_entry(x, training=training)
        for block in self.resnet_block3:
            x = block(x, training=training)
        
        x = self.resnet_block4_entry(x, training=training)
        for block in self.resnet_block4:
            x = block(x, training=training)
        
        
        x = self.avg1d(x)
        x = self.flatten(x)
        x = self.d100(x)
        x = self.d50(x)
        x = self.d10(x)
        return self.d1(x)
    
    @tf.function
    def train_step(self, data):
        x, y = data

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}
    
    @tf.function
    def test_step(self, data):
        x, y = data

        y_pred = self(x, training=False)
        self.compiled_loss(y, y_pred)
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}

In [16]:
EPOCHS = 100
BATCH_SIZE = 64
LR = 0.001
kf = KFold(n_splits=5)
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5),
    # ModelCheckpoint('../models/230518-RRpo-4B', monitor='val_loss', save_best_only=True)
]

model = ResNet34()
model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=LR, momentum=0.9, weight_decay=0.0001),
    loss=keras.losses.MeanAbsoluteError(),
    metrics=keras.metrics.MeanAbsoluteError()
)

In [17]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_tensor_slices((val_x, val_y)).batch(BATCH_SIZE)

history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    callbacks=callbacks,
    validation_data=val_dataset
)

Epoch 1/100


2023-05-19 16:53:12.566689: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [5206]
	 [[{{node Placeholder/_1}}]]




2023-05-19 16:53:33.442851: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [1302]
	 [[{{node Placeholder/_1}}]]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


In [18]:
min(history.history['val_loss'])

0.808845579624176