In [1]:
import tensorflow as tf 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from Folds_creator.Creator import Fold_Creator
from model.models import ModelCreation
from TFData.Dataset import Dataset
from callbacks.CyclicLR import CyclicLR
from utils import build_lrfn
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler
from tensorflow.keras.metrics import AUC,BinaryAccuracy 
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import load_model

In [2]:
TRAIN_DF = './Dataset/train.csv'
TEST_DF = './Dataset/test.csv'
TEST_TFRECORDS = tf.io.gfile.glob('./Dataset/tfrecords/test*.tfrec')
TRAINING_DATA_SIZE = 33126
TEST_DATA_SIZE = 10982
BATCH_SIZE = 8 
IMAGE_SIZE = [1024,1024]
RESIZE_SHAPE = [256,256]
EPOCHS = 5 
SHUFFLE = 256
VALIDATION_SPLIT = 0.2
N_SPLITS = 5
LEARNING_RATE = 1e-5
LR_MAX = 0.0004 
LR_MIN = 1e-6 
LR_RAMPUP_EPOCHS = 5
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = 0.8
SEED = 48
scheduler = build_lrfn(lr_start=LEARNING_RATE,
                       lr_max=LR_MAX,
                       lr_min=LR_MIN,
                       lr_rampup_epochs=LR_RAMPUP_EPOCHS,
                       lr_sustain_epochs=0,
                       lr_exp_decay=LR_EXP_DECAY)

In [3]:
Fold = Fold_Creator(train_df_path=TRAIN_DF,
                    test_df_path=TEST_DF,
                    tfrecord_path=None,
                    fold_type='StratifiedGroupKFold',
                    n_splits=N_SPLITS,shuffle=True,
                    random_state=SEED,group_col='patient_id')

In [None]:
oof_val_predictions = np.zeros((TRAINING_DATA_SIZE,))
test_preds = np.zeros((TEST_DATA_SIZE,N_SPLITS))
test_idnums = np.zeros((TEST_DATA_SIZE,))

i = 1

for trn_idx,val_idx,train_path,train_label,valid_path,valid_label in Fold.create_folds_generator():
    print("="* 20,"Fold:" + " " + str(i),"="*20)

    train = [train_path,train_label]
    validation = [valid_path,valid_label]

    save_path = f'./model/saved_models/EfficientNetB5_fold_{i}.h5'

    print(f"Save model in path: {save_path}")

    callbacks = [ 
        ModelCheckpoint(save_path,monitor='val_auc',verbose=1,save_best_only=True),
        LearningRateScheduler(scheduler)
    ]

    Data = Dataset(
        train_files = train,
        test_files = TEST_TFRECORDS,
        validation_files = validation,
        validation_split = 0.2,
        image_size = IMAGE_SIZE,
        shuffle = SHUFFLE,
        dataset_size = TRAINING_DATA_SIZE,
        batch_size = BATCH_SIZE,
        resize_shape = RESIZE_SHAPE
    )

    Model = ModelCreation(
        architecture = 'efficientnet',
        learning_rate = LEARNING_RATE,
        input_shape = (*RESIZE_SHAPE,3),
        output_shape = 1, 
        optimizer = Adam,
        metric = [BinaryAccuracy(),AUC()],
        loss = BinaryCrossentropy,
        linear = True,
        verbose = True
    )

    training = Data.get_train_from_tensor_slices()
    validation_data = Data.get_val_from_tensor_slices()

    history = Model.model.fit(training,
                            steps_per_epoch = Data.get_train_steps_per_epoch(),
                            epochs = EPOCHS,
                            validation_data = validation_data,
                            validation_steps = Data.get_validation_steps_per_epoch(),
                            verbose = 1,
                            callbacks = Model.inject_callbacks(callbacks))

    print(f"Loading model from path: {save_path}")

    Load_model = load_model(save_path)

    validation_images = validation_data.map(lambda image,label: image)
    
    probabilities = Load_model.predict(validation_images)

    oof_val_predictions[val_idx] = np.concatenate(probabilities)

    #Get test set predictions in fold 

    test_data = Data.get_test_dataset()

    test_images = test_data.map(lambda image,idnum : image)
    test_probabilities = Load_model.predict(test_images)

    test_preds[:,i] = np.concatenate([test_probabilities])

    test_ids = test_data.map(lambda image,idnum: idnum).unbatch()
    test_ids_images = next(iter(test_ids.batch(TEST_DATA_SIZE))).numpy().astype('U')

    test_idnums[:] = test_ids_images

    i += 1 