In [1]:
!pip install efficientnet

import efficientnet.tfkeras as efn
import tensorflow as tf
from tensorflow.keras.models import load_model
import re
import numpy as np
from kaggle_datasets import KaggleDatasets

model1_path = '../input/train-efficientnet/Effnet_30ep.h5'
model2_path = '../input/train-densenet-xception/Xception_save.h5'
model3_path = '../input/train-densenet-xception/Densenet_save.h5'

AUTO = tf.data.experimental.AUTOTUNE
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

BATCH_SIZE = 32 * strategy.num_replicas_in_sync
IMAGE_SIZE = [[512, 512] ,[331, 331], [224, 224]]
TEST_FILENAMES = None

Collecting efficientnet
  Downloading efficientnet-1.1.1-py3-none-any.whl (18 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 434 kB/s 
Installing collected packages: keras-applications, efficientnet
Successfully installed efficientnet-1.1.1 keras-applications-1.0.8
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
def decode_image(image_data, img_size):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    image = tf.reshape(image, [*img_size, 3]) # explicit size needed for TPU
    return image

def load_dataset(filenames, labeled=True, ordered=False, IMG_SIZE=None):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # disregarding data order. Order does not matter since we will be shuffling the data anyway.

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(lambda example: read_labeled_tfrecord(example, IMG_SIZE) if labeled else 
                          read_unlabeled_tfrecord(example, IMG_SIZE), num_parallel_calls=AUTO)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset

def read_unlabeled_tfrecord(example, img_size):
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "id": tf.io.FixedLenFeature([], tf.string),  # shape [] means single element
        # class is missing, this competitions's challenge is to predict flower classes for the test dataset
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example['image'], img_size)
    idnum = example['id']
    return image, idnum # returns a dataset of image(s)

def get_test_dataset(ordered=False, IMG_SIZE=None):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered, IMG_SIZE=IMG_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

def predict(model, img_size, n_iter):
    probs  = []
    data = get_test_dataset(ordered=True, IMG_SIZE=img_size)
    for i in range(n_iter):
        # Add TTA
        test_images_ds = data.map(lambda image, idnum: image)

        probs.append(model.predict(test_images_ds,verbose=0))
    return probs

def count_data_items(filenames):
    # the number of data items is written in the name of the .tfrec files, i.e. flowers00-230.tfrec = 230 data items
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

def get_testfiles_path(img_size=IMAGE_SIZE[0][0]):
    global TEST_FILENAMES
    GCS_DS_PATH = KaggleDatasets().get_gcs_path('tpu-getting-started')
    GCS_PATH_SELECT = { 192: GCS_DS_PATH + '/tfrecords-jpeg-192x192',
                        224: GCS_DS_PATH + '/tfrecords-jpeg-224x224',
                        331: GCS_DS_PATH + '/tfrecords-jpeg-331x331',
                        512: GCS_DS_PATH + '/tfrecords-jpeg-512x512' }
    GCS_PATH = GCS_PATH_SELECT[img_size]
    TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/test/*.tfrec') # predictions on this dataset should be submitted for the competition

In [3]:
model1 = load_model(model1_path)
model2 = load_model(model2_path)
model3 = load_model(model3_path)

get_testfiles_path(IMAGE_SIZE[0][0])
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)

In [4]:
print('Calculating predictions...')
probs1 = np.mean(predict(model1, IMAGE_SIZE[0], 3), axis=0)

get_testfiles_path(IMAGE_SIZE[1][0])
probs2 = np.mean(predict(model2, IMAGE_SIZE[1], 3), axis=0)

get_testfiles_path(IMAGE_SIZE[2][0])
probs3 = np.mean(predict(model3, IMAGE_SIZE[2], 3), axis=0)

probabilities = probs1 + probs2 + probs3
predictions = np.argmax(probabilities, axis=-1)

print('Generating submission file...')
test_ds = get_test_dataset(ordered=True, IMG_SIZE=IMAGE_SIZE[2])
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U') # all in one batch
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='id,label', comments='')


Calculating predictions...
Generating submission file...
