In [2]:
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import numpy as np
import os.path
from datetime import datetime

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

train_path = '/home/user/data/data/ML/train'
validation_path = '/home/user/data/test/'

In [3]:
# Checking the image sizes
img = Image.open('/home/user/data/data/ML/train/target_1/tumor_0_9_9178.jpg')
print(img.size)

image_size = (180,180)
batch_size = 32
epochs = 4
eval_all, mdl_lst, aug_lst = [], [], []

(224, 224)


In [4]:
class CustomAugment(object):
    def __call__(self, image):        
        # Random flips and grayscale with some stochasticity
        img = self._random_apply(tf.image.flip_left_right, image, p=0.6)
        img = self._random_apply(self._color_drop, img, p=0.9)
        return img

    def _color_drop(self, x):
        image = tf.image.rgb_to_grayscale(x)
        image = tf.tile(x, [1, 1, 1, 3])
        return x
    
    def _random_apply(self, func, x, p):
        return tf.cond(
          tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32),
                  tf.cast(p, tf.float32)),
          lambda: func(x),
          lambda: x)

In [5]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=train_path,
    labels="inferred",
    label_mode="binary",
    validation_split=0.2,
    subset="training",
    seed=0,
    image_size=image_size,
    batch_size=batch_size,
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=validation_path,
    labels="inferred",
    label_mode="binary",
    shuffle=False,
    seed=0,
    image_size=image_size,
    batch_size=batch_size,
)

Found 5000 files belonging to 2 classes.
Using 4000 files for training.


2021-11-10 15:29:45.440848: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


Found 6000 files belonging to 2 classes.


2021-11-10 15:29:45.455331: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-10 15:29:45.455697: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-10 15:29:45.457040: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-10 15:29:45.458234: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [6]:
train_ds = train_ds.prefetch(buffer_size=batch_size)

In [7]:
def make_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)
    # Image augmentation block
    x = data_augmentation(inputs)

    # Entry block
    x = layers.Rescaling(1.0 / 255)(x)
    x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv2D(64, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    for size in [128, 256, 512, 728]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.GlobalAveragePooling2D()(x)
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(units, activation=activation)(x)
    return keras.Model(inputs, outputs)

In [8]:
def execute_model(train_ds, test_ds, group, seq):

    model = make_model(input_shape=image_size + (3,), num_classes=2)

    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.AUC()],
    )
    model.fit(train_ds, epochs=epochs)
    eval_lst = [seq]+model.evaluate(train_ds)+model.evaluate(test_ds)+[group]
    
    return model, eval_lst

In [None]:
group_lst = ["basic_flip_rotation", "flip_color", "flip_color_zoom"]

aug_lst.append(keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1)]))

aug_lst.append(keras.Sequential([layers.Lambda(CustomAugment()),
                                 layers.RandomRotation(0.1)]))

aug_lst.append(keras.Sequential([layers.Lambda(CustomAugment()),
                                 layers.RandomRotation(0.1),
                                 layers.RandomZoom(0.1)]))

aug_lst.append(keras.Sequential([layers.Lambda(CustomAugment()),
                                 layers.RandomRotation(0.1),
                                 layers.RandomZoom(0.1)]))

In [None]:
# Playground

In [None]:
for i in range(0, len(aug_lst)):
    data_augmentation = aug_lst[i]
    mdl, eval_lst = execute_model(train_ds, test_ds, group_lst[i], i)
    mdl_lst.append(mdl)
    eval_all.append(eval_lst)

In [None]:
cols=["id", "train_loss", "train_bin_acc", "train_auc", "test_loss", "test_bin_acc", "test_auc", "group"]
df_best_model = pd.DataFrame(eval_all,columns=cols)
df_best_model = df_best_model.sort_values("test_auc", ascending=False).reset_index(drop=True)

In [None]:
df_best_model.head()

In [None]:
mdl = mdl_lst[df_best_model.loc[0, "id"]]

In [None]:
file_paths = test_ds.file_paths

def generate_sub_best_model(model, file_paths, save_sub):
    predictions = np.array([])
    labels =  np.array([])
    for x, y in test_ds:
        predictions = np.concatenate([predictions, model.predict(x).ravel()])
        labels = np.concatenate([labels, y.numpy().ravel()])
    df_solution = pd.DataFrame(data={'file_paths': file_paths, 'predictions': labels})
    #df_solution.to_csv('df_solution.csv', index=False)
    df_submission = pd.DataFrame(data={'file_paths': file_paths, 'predictions': predictions})
    df_submission["file_paths"] = df_submission["file_paths"].apply(lambda x: 
                                                                    x.replace("/home/user/data/test",
                                                                              "/data/challenges_data/test"))
    
    if (save_sub):
        dtn = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
        df_submission.to_csv('df_submission_'+str(dtn)+'.csv', index=False)
    return df_submission

In [None]:
save_submissions = False

In [None]:
df_submission = generate_sub_best_model(mdl, file_paths, save_submissions)

In [None]:
df_submission.head()