In [30]:
import os
import re
import numpy as np
import tensorflow as tf
from functools import partial
import matplotlib.pyplot as plt
import efficientnet.tfkeras as efn

VERBOSE = 2
EPOCHS = 40
BATCH_SIZE = 32

IMAGE_SIZE = (1024,1024)
IMAGE_SHAPE = (1024,1024,3)

strategy = tf.distribute.get_strategy() 
REPLICAS = strategy.num_replicas_in_sync
AUTOTUNE = tf.data.experimental.AUTOTUNE

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Using default strategy for CPU and single GPU")
print("REPLICAS:", REPLICAS)


Num GPUs Available:  0
Using default strategy for CPU and single GPU
REPLICAS: 1


In [31]:
print("Tensorflow version " + tf.__version__)
cwd = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

Tensorflow version 2.4.0


In [32]:
TRAINING_FILENAMES = tf.io.gfile.glob(cwd + "/data/train*.tfrec")[:10]
VALIDATION_FILENAMES = tf.io.gfile.glob(cwd + "/data/train*.tfrec")[10:16]
TEST_FILENAMES = tf.io.gfile.glob(cwd + "/data/test*.tfrec")

print("Train TFRecord Files:", len(TRAINING_FILENAMES))
print("Validation TFRecord Files:", len(VALIDATION_FILENAMES))
print("Test TFRecord Files:", len(TEST_FILENAMES))

Train TFRecord Files: 10
Validation TFRecord Files: 5
Test TFRecord Files: 16


In [33]:
def decode(serialized_example):
  features = tf.io.parse_example(
      serialized_example,
      # Defaults are not specified since both keys are required.
      features={
          'image': tf.io.FixedLenFeature([], tf.string),
          'target': tf.io.FixedLenFeature([], tf.int64),
      })

  # Convert from a scalar string tensor (whose single string has
  # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
  # [mnist.IMAGE_PIXELS].
  image = tf.io.decode_raw(features['image'], tf.uint8)
  #image.set_shape((mnist.IMAGE_PIXELS))

  # Convert target from a scalar uint8 tensor to an int32 scalar.
  target = tf.cast(features['target'], tf.int32)
  
  return image, target


In [34]:
def augment(image, label):
  # OPTIONAL: Could reshape into a 28×28 image and apply distortions
  # here.  Since we are not applying any distortions in this
  # example, and the next step expects the image to be flattened
  # into a vector, we don't bother.
  return image, label

In [35]:
def normalize(image, label):
    image = tf.cast(image, tf.float32) * (1. / 255)

    return image, label

In [36]:
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [37]:
def get_dataset(is_train, batch_size = 32, num_epochs = 40):
    """Get a TF-Dataset.

    Args:
    is_train: Selects between the training (True) and validation (False) data.
    batch_size: Number of examples per returned batch.
    num_epochs: Number of times to read the input data

    Returns:
    A tuple (images, labels), where:
    * images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS]
        in the range [-0.5, 0.5].
    * labels is an int32 tensor with shape [batch_size] with the true label,
        a number in the range [0, mnist.NUM_CLASSES).
    This function creates a one_shot_iterator, meaning that it will only iterate
    over the dataset once. On the other hand there is no special initialization
    required.
    """

    tf_records = TRAINING_FILENAMES if is_train else VALIDATION_FILENAMES
    print("Reading file containing", count_data_items(tf_records), "items.")

    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False #disabling order, increasing speed

    with tf.name_scope('input'):
        # TFRecordDataset opens a protobuf and reads entries line by line
        # could also be [list, of, filenames]
        dataset = tf.data.TFRecordDataset(tf_records, num_parallel_reads=AUTOTUNE)
        dataset = dataset.cache()
        dataset = dataset.with_options(ignore_order)
        dataset = dataset.repeat(num_epochs)

        # the parameter is the queue size
        dataset = dataset.shuffle(1000 + 3 * batch_size)
        dataset = dataset.batch(batch_size)

        # map takes a python function and applies it to every sample
        dataset = dataset.map(decode)
        dataset = dataset.map(augment)
        dataset = dataset.map(normalize)

        dataset = dataset.prefetch(AUTOTUNE)

        return dataset

        #iterator = dataset.make_one_shot_iterator()

    #return iterator.get_next()

In [38]:
def get_lr_callback(batch_size=BATCH_SIZE):
    lr_start   = 0.000005
    lr_max     = 0.00000125 * REPLICAS * batch_size
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

In [50]:
with tf.Graph().as_default():
    is_train=True
    train_ds = get_dataset(is_train)

    is_train=False
    val_ds = get_dataset(is_train)

    files_train = count_data_items(TRAINING_FILENAMES)
    steps_per_epoch=files_train/BATCH_SIZE//REPLICAS
    
    inp = tf.keras.layers.Input(shape=IMAGE_SHAPE)
    base = efn.EfficientNetB6(input_shape=IMAGE_SHAPE,weights='imagenet',include_top=False)

    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 

    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    
    model.summary()


    

Reading file containing 21788 items.
Reading file containing 10904 items.
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1024, 1024, 3)]   0         
_________________________________________________________________
efficientnet-b6 (Functional) (None, 32, 32, 2304)      40960136  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2305      
Total params: 40,962,441
Trainable params: 40,738,009
Non-trainable params: 224,432
_________________________________________________________________


In [None]:
with tf.Graph().as_default():
    history = model.fit(
        train_ds, 
        epochs=EPOCHS, 
        callbacks = [get_lr_callback()], 
        steps_per_epoch=steps_per_epoch,
        validation_data=val_ds,
        verbose=VERBOSE
    )