In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf


In [None]:
# Init data path
root_dir = '~/Data/OSIC'
train_csv_path = os.path.join(root_dir, 'train.csv')
test_csv_path = os.path.join(root_dir, 'test.csv')
img_dir = os.path.join(root_dir, 'preprocessing_data')


In [None]:
# Read CSV
train_csv = pd.read_csv(train_csv_path)
test_csv = pd.read_csv(test_csv_path)
train_csv.head()

In [None]:
# Preprocessing


In [None]:
# Dataset
class Dataset:
    label_col_name = 'FVC'
    root_dir = '/Users/younghun/Data/OSIC/'

    # root_dir = '~/Data/OSIC'

    def __init__(self, data_list, epoch=1, batch_size=10):
        # init labels
        label_list = data_list[self.label_col_name].to_numpy()
        self.patient_list = data_list['Patient'].to_numpy()

        data_list.drop([self.label_col_name, 'Patient'], axis=1, inplace=True)
        data_list = pd.get_dummies(data_list)

        # init dataset
        self.dataset = tf.data.Dataset.from_tensor_slices((data_list, label_list, np.arange(len(self.patient_list))))
        self.dataset = self.dataset.map(lambda data, label, index: tf.py_function(self.read_img, [data, label, index], [tf.float64, tf.float64, tf.int64]))
        self.dataset = self.dataset.repeat(epoch)
        # self.dataset = self.dataset.shuffle(buffer_size=(int(len(data_list) * 0.4) + 3 * batch_size))
        self.dataset = self.dataset.batch(batch_size, drop_remainder=False)

    def __iter__(self):
        return self.dataset.__iter__()

    def read_img(self, data, label, index: tf.Tensor):
        img_path = os.path.join(self.root_dir, 'preprocessing_data', f'{self.patient_list[index]}.npy')
        img = np.load(img_path)
        img.resize((1, 38, 334, 334))

        return img, data, label


In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Conv3D


class PFPModel(Model):
    def __init__(self):
        super().__init__()
        self.build_model()

    def build_model(self):
        self.conv1 = tf.keras.Sequential([
            Conv3D(filters=200, kernel_size=3, padding='same', activation='relu'),
            Conv3D(filters=100, kernel_size=3, padding='same', activation='relu'),
            Conv3D(filters=100, kernel_size=3, padding='same', activation='relu'),
            Conv3D(filters=50, kernel_size=3, padding='same', activation='relu'),
            Flatten(),
        ])

        self.fc = tf.keras.Sequential([
            Dense(500, activation='relu'),
            Dense(100, activation='relu'),
            Dense(1)
        ])

    def fit(self, dataset, epoch_num=100):
        # compile
        self.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.MSE, metrics=['mse'])

        for step, (img, x, y) in enumerate(dataset):
            y = tf.cast(y, tf.float32)
            with tf.GradientTape() as tape:
                output = self.call((img, x))
                loss = self.loss(output, y)
                gradients = tape.gradient(loss, self.trainable_variables)
                self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

            print('STEP:', step, np.mean(loss.numpy()))

    def call(self, inputs, *args, **kwargs):
        """
        inputs: {imgs: [], info: []}
        """

        imgs = inputs[0]
        info = inputs[1]

        imgs = tf.cast(imgs, float)
        info = tf.cast(info, float)

        conv_out = self.conv1(imgs)

        info = tf.concat((conv_out, info), axis=1)
        out = self.fc(info)
        return out
/*

In [None]:
dataset = Dataset(train_csv)

# load model
model = PFPModel()
model.fit(dataset)