In [1]:
!pip install --quiet /kaggle/input/kerasapplications
!pip install --quiet /kaggle/input/efficientnet-keras-source-code

In [2]:
import math, os, random, re
import tensorflow as tf
import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
from tensorflow import keras
import efficientnet.tfkeras as efn
from functools import partial

In [3]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 20
IMAGE_SIZE = [512, 512]
CLASSES = ['0', '1', '2', '3', '4']
SEED = 42

In [4]:
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

TEST_FILENAMES = tf.io.gfile.glob('../input/cassava-leaf-disease-classification/test_tfrecords/ld_test*.tfrec')
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)

In [5]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.cast(image, tf.float32) / 255.0     ###
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def read_tfrecord(example, labeled):
    tfrecord_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    } if labeled else {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['target'], tf.int32)
        return image, label
    idnum = example['image_name']
    return image, idnum

def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE)
    return dataset

def get_test_dataset(ordered=True):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [6]:
model = tf.keras.models.load_model('../input/cassava-f42/f42_model.h5')

In [7]:
test_ds = get_test_dataset()
test_img_ds = test_ds.map(lambda image, idnum: image)
predictions = np.argmax(model.predict(test_img_ds), axis=-1)

In [8]:
u,c = np.unique(predictions, return_counts = True)
dict(zip(u,c))

{4: 1}

In [9]:
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U') # all in one batch
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='image_id,label', comments='')

In [10]:
!head submission.csv

image_id,label
2216849948.jpg,4
