In [1]:
import tensorflow as tf
import pandas as pd 
import numpy as np 
from glob import glob
import matplotlib.pyplot as plt 
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping
from tensorflow.keras.metrics import AUC,BinaryAccuracy 
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import load_model
from Folds_creator.Creator import Fold_Creator
from model.models import ModelCreation
from TFData.Dataset import Dataset,Dataset_TTA
from callbacks.CyclicLR import CyclicLR
from utils import build_lrfn

In [None]:
TRAIN_DF = './Dataset/train.csv'
TEST_DF = './Dataset/test.csv'
TEST_TFRECORDS = tf.io.gfile.glob('./Dataset/tfrecords/test*.tfrec')
TRAINING_DATA_SIZE = 33126
TEST_DATA_SIZE = 10982
BATCH_SIZE = 8
IMAGE_SIZE = [1024,1024]
RESIZE_SHAPE = [256,256]
EPOCHS = 5 
SHUFFLE = 256
VALIDATION_SPLIT = 0.2
N_SPLITS = 5
LEARNING_RATE = 3e-5
LR_MAX = 0.0003 
LR_MIN = 0.00003 
LR_RAMPUP_EPOCHS = 2
LR_SUSTAIN_EPOCHS = 1
LR_EXP_DECAY = 0.7
SEED = 48
scheduler = build_lrfn(lr_start=LEARNING_RATE,
                        lr_max=LR_MAX,
                        lr_min=LR_MIN,
                        lr_rampup_epochs=LR_RAMPUP_EPOCHS,
                        lr_sustain_epochs=LR_SUSTAIN_EPOCHS,
                        lr_exp_decay=LR_EXP_DECAY)

In [None]:
Fold = Fold_Creator(train_df_path=TRAIN_DF,
                        test_df_path=TEST_DF,
                        tfrecord_path=None,
                        fold_type='StratifiedGroupKFold',
                        n_splits=N_SPLITS,shuffle=True,
                        random_state=SEED,group_col='patient_id')

In [None]:
oof_val_predictions = np.zeros((TRAINING_DATA_SIZE,))
test_preds = np.zeros((TEST_DATA_SIZE,N_SPLITS))
    
i = 1
    
for trn_idx,val_idx,train_path,train_label,valid_path,valid_label in Fold.create_folds_generator():

    tf.keras.backend.clear_session() # Clear session so the other model will be trained on new one.

    print("="* 20,f'Fold_{i}',"=" * 20)

    train = [train_path,train_label]
    validation = [valid_path,valid_label]

    save_path = f'./model/saved_models/EfficientNetB3_Non_Linear_LS_0.05_fold_{i}.h5'

    print(f"Save model in path: {save_path}")

    callbacks = [ 
        ModelCheckpoint(filepath=save_path,monitor='val_auc',verbose=1,save_best_only=True,mode='max'),
        LearningRateScheduler(scheduler,verbose=1),
    ]

    Data = Dataset(
        train_files = train,
        test_files = TEST_TFRECORDS,
        validation_files = validation,
        validation_split = 0.2,
        image_size = IMAGE_SIZE,
        shuffle = SHUFFLE,
        dataset_size = TRAINING_DATA_SIZE,
        batch_size = BATCH_SIZE,
        resize_shape = RESIZE_SHAPE
    )

    Model = ModelCreation(
        architecture = 'efficientnet',
        learning_rate = LEARNING_RATE,
        input_shape = (*RESIZE_SHAPE,3),
        output_shape = 1, 
        optimizer = Adam,
        metric = [BinaryAccuracy(),AUC()],
        loss = BinaryCrossentropy(label_smoothing=0.05),
        mode = 'non_linear',
        verbose = True
    )

    training = Data.get_train_from_tensor_slices()
    validation_data = Data.get_val_from_tensor_slices()

    history = Model.model.fit(training,
                                steps_per_epoch = Data.get_train_steps_per_epoch(),
                                epochs = EPOCHS,
                                validation_data = validation_data,
                                validation_steps = Data.get_validation_steps_per_epoch(),
                                verbose = 1,
                                callbacks = Model.inject_callbacks(callbacks))

    print(f"Loading model from path: {save_path}")

    Load_model = load_model(save_path)

    validation_images = validation_data.map(lambda image,label: image)
    
    probabilities = Load_model.predict(validation_images)

    oof_val_predictions[val_idx] = np.concatenate(probabilities)

    #Get test set predictions in fold 

    test_data = Data.get_test_dataset()

    test_images = test_data.map(lambda image,idnum : image)
    test_probabilities = Load_model.predict(test_images)

    test_preds[:,i-1] = np.concatenate(test_probabilities)

    i += 1 

In [2]:
TRAIN = './Dataset/train.csv'
TEST = './Dataset/test.csv'
TRAIN_DF = tf.io.gfile.glob('./Dataset/CDeotte/Melanoma2020_128x128/train*.tfrec')
TRAIN_DF_2 = tf.io.gfile.glob('./Dataset/CDeotte/Melanoma2019_128x128/train*.tfrec')
TEST_TFRECORDS = tf.io.gfile.glob('./Dataset/tfrecords/test*.tfrec')
TRAINING_DATA = TRAIN_DF + TRAIN_DF_2
TRAINING_DATA_SIZE = 58457
TEST_DATA_SIZE = 10982
BATCH_SIZE = 8
IMAGE_SIZE = [128,128]
RESIZE_SHAPE = [128,128]
INPUT_SHAPES = ((*RESIZE_SHAPE,3),(4,))
EPOCHS = 5 
SHUFFLE = 256
VALIDATION_SPLIT = 0.2
N_SPLITS = 5
LEARNING_RATE = 3e-5
LR_MAX = 0.0003 
LR_MIN = 0.00003 
LR_RAMPUP_EPOCHS = 2
LR_SUSTAIN_EPOCHS = 1
LR_EXP_DECAY = 0.7
SEED = 48
scheduler = build_lrfn(lr_start=LEARNING_RATE,
                        lr_max=LR_MAX,
                        lr_min=LR_MIN,
                        lr_rampup_epochs=LR_RAMPUP_EPOCHS,
                        lr_sustain_epochs=LR_SUSTAIN_EPOCHS,
                        lr_exp_decay=LR_EXP_DECAY)

In [3]:
Fold = Fold_Creator(train_df_path=TRAIN,
                        test_df_path=TEST,
                        tfrecord_path=TRAINING_DATA,
                        fold_type='KFold',
                        n_splits=N_SPLITS,shuffle=True,
                        random_state=SEED,group_col=None)

In [4]:
models = [] 
oof_image_name = [] 
oof_target = [] 
oof_prediction = [] 

i = 1 

for train_idx,valid_idx,training_path,validation_path in Fold.create_tfrecord_fold_generator():

    tf.keras.backend.clear_session() # Clear session so the other model will be trained on new one.

    print("="* 20,f'Fold_{i}',"=" * 20)
    
    save_path = f'./model/saved_models/EfficientNetB3_Non_Linear_LS_0.05_fold_{i}.h5'

    Data = Dataset(
        train_files = training_path,
        test_files = TEST_TFRECORDS,
        validation_files = validation_path,
        validation_split = VALIDATION_SPLIT,
        image_size = IMAGE_SIZE,
        shuffle = SHUFFLE, 
        dataset_size = TRAINING_DATA_SIZE,
        batch_size = BATCH_SIZE,
        resize_shape = RESIZE_SHAPE,
        include_meta = True
    )

    Model = ModelCreation(
        architecture = 'efficientnet',
        learning_rate = LEARNING_RATE,
        input_shape = INPUT_SHAPES,
        output_shape = 1, 
        optimizer = Adam,
        metric = [BinaryAccuracy(),AUC()],
        loss = BinaryCrossentropy(label_smoothing=0.05),
        mode = 'meta',
        verbose = True
    )

    training = Data.get_training_dataset()
    validation = Data.get_validation_dataset()

    callbacks = [ 
        ModelCheckpoint(filepath=save_path,monitor='val_auc',verbose=1,save_best_only=True,mode='max'),
        LearningRateScheduler(scheduler,verbose=1),
                ]
    
    history = Model.model.fit(training,steps_per_epoch = Data.get_train_steps_per_epoch(),
                        epochs = EPOCHS,callbacks = Model.inject_callbacks(callbacks),
                        validation_data = validation,verbose = 1)
    
    models.append(Model.model)

    dataset_full = Data.get_full_dataset(validation_path)

    image_name = dataset_full.map(lambda image,image_name,target: image_name).unbatch() 
    image_name = next(iter(image_name.batch(len(val_idx)))).numpy().astype('U')

    target = dataset_full.map(lambda image,image_name,target : target).unbatch()
    target = next(iter(target.batch(len(val_idx)))).numpy()

    image = dataset_full.map(lambda image,image_name,target : image)

    Load_model = load_model(save_path)

    probabilites = Load_model.predict(image)

    oof_image_name.extend(list(image_name))

    oof_target.extend(list(target))

    oof_prediction.extend(list(np.concatenate(probabilities)))

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inp1 (InputLayer)               [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
efficientnet-b3 (Model)         (None, 4, 4, 1536)   10783528    inp1[0][0]                       
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1536)         0           efficientnet-b3[1][0]            
__________________________________________________________________________________________________
dense (Dense)                   (None, 2048)         3147776     global_average_pooling2d[0][0]   
______________________________________________________________________________________________

TypeError: `filenames` must be a `tf.data.Dataset` of `tf.string` elements.