# Flower Classification using CNN Models

We recovered Phil Cullington's notebook using TPU to make Flower Classification.  
Value of initial accuracy : ~0.35

# Getting libraries

In [None]:
import tensorflow as tf
from kaggle_datasets import KaggleDatasets
import numpy as np
import math
import matplotlib.pyplot as plt
import re
print("Tensorflow version " + tf.__version__)
!pip install efficientnet
import efficientnet.tfkeras as efficientnet

# Detect my accelerator

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    tpu = (
        tf.distribute.cluster_resolver.TPUClusterResolver()
    )  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print("Running on TPU ", tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = (
        tf.distribute.get_strategy()
    )  # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

# Data path
### Look on work folders

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path(
    "tpu-getting-started"
)  # you can list the bucket with "!gsutil ls $GCS_DS_PATH"
# GCS_DS_PATH
!gsutil ls $GCS_DS_PATH

# Set parameters

In [None]:
size = 512
IMAGE_SIZE = [size, size]  # at 512x512, a GPU will run out of memory. Use the TPU
EPOCHS = 15
BATCH_SIZE = 16 * strategy.num_replicas_in_sync # Use this line for TPU
#BATCH_SIZE = 8 * strategy.num_replicas_in_sync  # Use this line for GPU
AUTO = tf.data.AUTOTUNE

In [None]:
GCS_PATH_EXT = KaggleDatasets().get_gcs_path('tf-flower-photo-tfrec')

IMAGENET_FILES = tf.io.gfile.glob(GCS_PATH_EXT + f'/imagenet_no_test/tfrecords-jpeg-{size}x{size}/*.tfrec'.format(size=size))
INATURELIST_FILES = tf.io.gfile.glob(GCS_PATH_EXT + f'/inaturalist_no_test/tfrecords-jpeg-{size}x{size}/*.tfrec'.format(size=size))
OPENIMAGE_FILES = tf.io.gfile.glob(GCS_PATH_EXT + f'/openimage_no_test/tfrecords-jpeg-{size}x{size}/*.tfrec'.format(size=size))
OXFORD_FILES = tf.io.gfile.glob(GCS_PATH_EXT + f'/oxford_102_no_test/tfrecords-jpeg-{size}x{size}/*.tfrec'.format(size=size))
TENSORFLOW_FILES = tf.io.gfile.glob(GCS_PATH_EXT + f'/tf_flowers_no_test/tfrecords-jpeg-{size}x{size}/*.tfrec'.format(size=size))

# Define classes name

In [None]:
CLASSES = [
    "pink primrose",
    "hard-leaved pocket orchid",
    "canterbury bells",
    "sweet pea",
    "wild geranium",
    "tiger lily",
    "moon orchid",
    "bird of paradise",
    "monkshood",
    "globe thistle",  # 00 - 09
    "snapdragon",
    "colt's foot",
    "king protea",
    "spear thistle",
    "yellow iris",
    "globe-flower",
    "purple coneflower",
    "peruvian lily",
    "balloon flower",
    "giant white arum lily",  # 10 - 19
    "fire lily",
    "pincushion flower",
    "fritillary",
    "red ginger",
    "grape hyacinth",
    "corn poppy",
    "prince of wales feathers",
    "stemless gentian",
    "artichoke",
    "sweet william",  # 20 - 29
    "carnation",
    "garden phlox",
    "love in the mist",
    "cosmos",
    "alpine sea holly",
    "ruby-lipped cattleya",
    "cape flower",
    "great masterwort",
    "siam tulip",
    "lenten rose",  # 30 - 39
    "barberton daisy",
    "daffodil",
    "sword lily",
    "poinsettia",
    "bolero deep blue",
    "wallflower",
    "marigold",
    "buttercup",
    "daisy",
    "common dandelion",  # 40 - 49
    "petunia",
    "wild pansy",
    "primula",
    "sunflower",
    "lilac hibiscus",
    "bishop of llandaff",
    "gaura",
    "geranium",
    "orange dahlia",
    "pink-yellow dahlia",  # 50 - 59
    "cautleya spicata",
    "japanese anemone",
    "black-eyed susan",
    "silverbush",
    "californian poppy",
    "osteospermum",
    "spring crocus",
    "iris",
    "windflower",
    "tree poppy",  # 60 - 69
    "gazania",
    "azalea",
    "water lily",
    "rose",
    "thorn apple",
    "morning glory",
    "passion flower",
    "lotus",
    "toad lily",
    "anthurium",  # 70 - 79
    "frangipani",
    "clematis",
    "hibiscus",
    "columbine",
    "desert-rose",
    "tree mallow",
    "magnolia",
    "cyclamen ",
    "watercress",
    "canna lily",  # 80 - 89
    "hippeastrum ",
    "bee balm",
    "pink quill",
    "foxglove",
    "bougainvillea",
    "camellia",
    "mallow",
    "mexican petunia",
    "bromelia",
    "blanket flower",  # 90 - 99
    "trumpet creeper",
    "blackberry lily",
    "common tulip",
    "wild rose",
]

In [None]:
VAL_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/val/*.tfrec")
TEST_FILESNAMES = tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/test/*.tfrec")

In [None]:
#TRAINING_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/train/*.tfrec") + TENSORFLOW_FILES + OXFORD_FILES + OPENIMAGE_FILES + INATURELIST_FILES + IMAGENET_FILES 
TRAINING_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/train/*.tfrec") + TENSORFLOW_FILES + OXFORD_FILES
#TRAINING_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/train/*.tfrec") + VAL_FILENAMES+ TENSORFLOW_FILES + OXFORD_FILES

 # Functions space

In [None]:
### Decoding image
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = (
        tf.cast(image, tf.float32) / 255.0
    )  # convert image to floats in [0, 1] range
    image = tf.reshape(image, [*IMAGE_SIZE, 3])  # explicit size needed for TPU
    return image


def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),  # tf.string means bytestring
        "class": tf.io.FixedLenFeature([], tf.int64),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example["image"])
    label = tf.cast(example["class"], tf.int32)
    return image, label  # returns a dataset of (image, label) pairs


def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),  # tf.string means bytestring
        "id": tf.io.FixedLenFeature([], tf.string),  # shape [] means single element
        # class is missing, this competitions's challenge is to predict flower classes for the test dataset
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example["image"])
    idnum = example["id"]
    return image, idnum  # returns a dataset of image(s)


### Data augmentation function
#### Horizontal and vertical mirorring
#### Random cropping
#### Random rotation
#### Saturation, brightness and contrast can be modified randomly too


def data_augment(x, y):
    #x = tf.image.random_flip_left_right(x)
    #x = tf.image.random_flip_up_down(x)
    #x = tf.image.random_crop(x, size = [size,size,3])
    #p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    # if p_rotate > .8:
    #    x = tf.image.rot90(x, k=3)
    # elif p_rotate > .6:
    #    x = tf.image.rot90(x, k=2)
    # elif p_rotate > .4:
    #    x = tf.image.rot90(x, k=1)
    #x = tf.image.random_saturation(x,0.5,2)
    #x = tf.image.random_brightness(x, 2)
    #x = tf.image.random_contrast(x,0.8,1.2)
    return x, y


### Loading data
def load_dataset(filenames, labeled=True, ordered=False):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # disregarding data order. Order does not matter since we will be shuffling the data anyway.

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False  # disable order, increase speed

    dataset = tf.data.TFRecordDataset(
        filenames
    )  # automatically interleaves reads from multiple files
    dataset = dataset.with_options(
        ignore_order
    )  # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    return dataset


### Getting training, validation and test dataset
def get_training_dataset():
    dataset = load_dataset(
        TRAINING_FILENAMES,
        labeled=True,
    )
    dataset = dataset.repeat()  # the training dataset must repeat for several epochs
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset


def get_validation_dataset(ordered = False):
    dataset = load_dataset(
        tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/val/*.tfrec"),
        labeled=True,
        ordered=ordered,
    )
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.cache()
    return dataset


def get_test_dataset(ordered=False):
    dataset = load_dataset(
        tf.io.gfile.glob(GCS_DS_PATH + f"/tfrecords-jpeg-{size}x{size}/test/*.tfrec"),
        labeled=False,
        ordered=ordered,
    )
    dataset = dataset.batch(BATCH_SIZE)
    return dataset


training_dataset = get_training_dataset()
validation_dataset = get_validation_dataset()


def batch_to_numpy_images_and_labels(data):
    images, labels = data
    numpy_images = images.numpy()
    numpy_labels = labels.numpy()
    if numpy_labels.dtype == object:  # binary string in this case,
        # these are image ID strings
        numpy_labels = [None for _ in enumerate(numpy_images)]
    # If no labels, only image IDs, return None for labels (this is
    # the case for test data)
    return numpy_images, numpy_labels


def title_from_label_and_target(label, correct_label):
    if correct_label is None:
        return CLASSES[label], True
    correct = label == correct_label
    return (
        "{} [{}{}{}]".format(
            CLASSES[label],
            "OK" if correct else "NO",
            "\u2192" if not correct else "",
            CLASSES[correct_label] if not correct else "",
        ),
        correct,
    )


def display_one_flower(image, title, subplot, red=False, titlesize=16):
    plt.subplot(*subplot)
    plt.axis("off")
    plt.imshow(image)
    if len(title) > 0:
        plt.title(
            title,
            fontsize=int(titlesize) if not red else int(titlesize / 1.2),
            color="red" if red else "black",
            fontdict={"verticalalignment": "center"},
            pad=int(titlesize / 1.5),
        )
    return (subplot[0], subplot[1], subplot[2] + 1)


def display_batch_of_images(databatch, predictions=None):
    """This will work with:
    display_batch_of_images(images)
    display_batch_of_images(images, predictions)
    display_batch_of_images((images, labels))
    display_batch_of_images((images, labels), predictions)
    """
    # data
    images, labels = batch_to_numpy_images_and_labels(databatch)
    if labels is None:
        labels = [None for _ in enumerate(images)]

    # auto-squaring: this will drop data that does not fit into square
    # or square-ish rectangle
    rows = int(math.sqrt(len(images)))
    cols = len(images) // rows

    # size and spacing
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot = (rows, cols, 1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE, FIGSIZE / cols * rows))
    else:
        plt.figure(figsize=(FIGSIZE / rows * cols, FIGSIZE))

    # display
    for i, (image, label) in enumerate(
        zip(images[: rows * cols], labels[: rows * cols])
    ):
        title = "" if label is None else CLASSES[label]
        correct = True
        if predictions is not None:
            title, correct = title_from_label_and_target(predictions[i], label)
        dynamic_titlesize = (
            FIGSIZE * SPACING / max(rows, cols) * 40 + 3
        )  # magic formula tested to work from 1x1 to 10x10 images
        subplot = display_one_flower(
            image, title, subplot, not correct, titlesize=dynamic_titlesize
        )

    # layout
    plt.tight_layout()
    if label is None and predictions is None:
        plt.subplots_adjust(wspace=0, hspace=0)
    else:
        plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()


### Defining the evolution of loss parameter
def lr_function(epoch):
    start_lr = 1e-3
    min_lr = 5e-5
    max_lr = 1e-3
    rampup_epochs = 5
    sustain_epochs = 0
    exp_decay = 0.8

    def lr(epoch, start_lr, min_lr, max_lr, rampup_epochs, sustain_epochs, exp_decay):
        if epoch < rampup_epochs:
            lr = (max_lr - start_lr) / rampup_epochs * epoch + start_lr
        elif epoch < rampup_epochs + sustain_epochs:
            lr = max_lr
        else:  # E
            lr = (max_lr - min_lr) * exp_decay ** (
                epoch - rampup_epochs - sustain_epochs
            ) + min_lr
        return lr

    return lr(epoch, start_lr, min_lr, max_lr, rampup_epochs, sustain_epochs, exp_decay)


callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                    patience=5,
                                    restore_best_weights=True),
    tf.keras.callbacks.LearningRateScheduler(
        lambda epoch: lr_function(epoch), verbose=True  # B
    )
]

### Define function to display results
def display_training_curves(training, validation, title, subplot):
    if subplot % 10 == 1:
        plt.subplots(figsize=(10, 10), facecolor="#F0F0F0")
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor("#F8F8F8")
    ax.plot(training)
    ax.plot(validation)
    ax.set_title("model " + title)
    ax.set_ylabel(title)
    ax.set_xlabel("epoch")
    ax.legend(["train ", "valid"])
        
def count_data_items(filenames):
    # the number of data items is written in the name of the .tfrec
    # files, i.e. flowers00-230.tfrec = 230 data items
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
NUM_TEST_IMAGES = 7382
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE

# Visualize Data

In [None]:
ds_train = get_training_dataset()
ds_iter = iter(ds_train.unbatch().batch(20))
one_batch = next(ds_iter)
display_batch_of_images(one_batch)

# Models

# DenseNet201

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

%%time
with strategy.scope():
    pretrained_model = tf.keras.applications.DenseNet201(
        weights="imagenet", include_top=False, input_shape=(size, size, 3)
    )
    pretrained_model.trainable = True  # transfer learning
    model3= tf.keras.Sequential(
        [
            pretrained_model,
            # tf.keras.layers.BatchNormalization(),
            tf.keras.layers.GlobalAveragePooling2D(),
            # tf.keras.layers.Dropout(0.1),
            tf.keras.layers.Dense(104, activation="softmax"),
        ]
    )

model3.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

historical = model3.fit(
    training_dataset,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks,
)

display_training_curves(
    historical.history["loss"],
    historical.history["val_loss"],
    "loss",
    211,
)

display_training_curves(
    historical.history["sparse_categorical_accuracy"],
    historical.history["val_sparse_categorical_accuracy"],
    "accuracy",
    212,
)

# RESNET

with strategy.scope():
    pretrained_model = tf.keras.applications.ResNet152V2(
        weights="imagenet", include_top=False, input_shape=(512, 512, 3)
    )
    pretrained_model.trainable = True  # transfer learning

    model = tf.keras.Sequential(
        [
            pretrained_model,
            # tf.keras.layers.BatchNormalization(),
            tf.keras.layers.GlobalAveragePooling2D(),
            # tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(104, activation="softmax"),
        ]
    )

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

historical = model.fit(
    training_dataset,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks,
)

display_training_curves(
    historical.history["loss"],
    historical.history["val_loss"],
    "loss",
    211,
)

display_training_curves(
    historical.history["sparse_categorical_accuracy"],
    historical.history["val_sparse_categorical_accuracy"],
    "accuracy",
    212,
)

# Xception

with strategy.scope():
    pretrained_model = tf.keras.applications.Xception(
        weights="imagenet", include_top=False, input_shape=(512, 512, 3)
    )
    pretrained_model.trainable = True  # transfer learning

    model = tf.keras.Sequential(
        [
            pretrained_model,
            # tf.keras.layers.BatchNormalization(),
            tf.keras.layers.GlobalAveragePooling2D(),
            # tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(104, activation="softmax"),
        ]
    )

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

historical = model.fit(
    training_dataset,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks,
)

display_training_curves(
    historical.history["loss"],
    historical.history["val_loss"],
    "loss",
    211,
)

display_training_curves(
    historical.history["sparse_categorical_accuracy"],
    historical.history["val_sparse_categorical_accuracy"],
    "accuracy",
    212,
)

with strategy.scope():    
    pretrained_model = efficientnet.EfficientNetB7(weights='noisy-student', 
                                                         include_top=False ,
                                                         input_shape=(512,512,3))
    pretrained_model.trainable = True # tramsfer learning

# EfficientNetV2XL

This model has been downloaded from this link : https://github.com/leondgarse/keras_cv_attention_models
The tensorflow's version on kaggle is not sufficiently updated to get this model via keras.applications, we thus get it, and load it using load_model function.

In [None]:
with strategy.scope():
    modelv2xl = tf.keras.models.load_model(
        "/kaggle/input/effnetv2xl/efficientnetv2-xl-21k-ft1k.h5", compile=True
    )
    modelv2xl.trainable = False
    model1 = tf.keras.Sequential(
        [
            modelv2xl,
            # tf.keras.layers.BatchNormalization(),
            #tf.keras.layers.Dropout(0.2),
            # tf.keras.layers.Reshape((10,10,10)),
            # tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(3500, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(104, activation="softmax"), 
            #tf.keras.layers.Dense(104, activation="relu"),
        ]
    )
model1.summary()

model1.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

historical = model1.fit(
    training_dataset,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks,
)

display_training_curves(
    historical.history["loss"],
    historical.history["val_loss"],
    "loss",
    211,
)

display_training_curves(
    historical.history["sparse_categorical_accuracy"],
    historical.history["val_sparse_categorical_accuracy"],
    "accuracy",
    212,
)

In [None]:
model1.save("/kaggle/working/model_effnetv2xl.h5", save_format="h5")

# EfficientNetB7

In [None]:
with strategy.scope():
    pretrained_model = efficientnet.EfficientNetB7(
        weights="noisy-student", include_top=False, input_shape=(size, size, 3)
    )
    pretrained_model.trainable = True  # transfer learning

    model2 = tf.keras.Sequential(
        [
            pretrained_model,
            # tf.keras.layers.BatchNormalization(),
            # tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(3500, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(104, activation="softmax"),
        ]
    )
model2.summary()

model2.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

historical = model2.fit(
    training_dataset,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks,
)

display_training_curves(
    historical.history["loss"],
    historical.history["val_loss"],
    "loss",
    211,
)

display_training_curves(
    historical.history["sparse_categorical_accuracy"],
    historical.history["val_sparse_categorical_accuracy"],
    "accuracy",
    212,
)

model2.save("/kaggle/working/model_effnetb7.h5", save_format="h5")

# EfficientNetV2L

with strategy.scope():
    modelv2l = tf.keras.models.load_model(
        "/kaggle/input/effnetv2l/efficientnetv2-l-imagenet.h5", compile=True
    )
    modelv2l.trainable = False
    model = tf.keras.Sequential(
        [
            modelv2l,
            # tf.keras.layers.BatchNormalization(),
            # tf.keras.layers.Dropout(0.2),
            # tf.keras.layers.Reshape((10,10,10)),
            # tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(104, activation="softmax"),
        ]
    )
model.summary()

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

historical = model.fit(
    training_dataset,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_dataset,
    callbacks=callbacks,
)

display_training_curves(
    historical.history["loss"],
    historical.history["val_loss"],
    "loss",
    211,
)

display_training_curves(
    historical.history["sparse_categorical_accuracy"],
    historical.history["val_sparse_categorical_accuracy"],
    "accuracy",
    212,
)

In [None]:
from sklearn.metrics import f1_score
NUM_VALIDATION_IMAGES = int(count_data_items(VAL_FILENAMES))
cmdataset = get_validation_dataset(ordered=True) # since we are splitting the dataset and iterating separately on images and labels, order matters.
images_ds = cmdataset.map(lambda image, label: image)
labels_ds = cmdataset.map(lambda image, label: label).unbatch()
cm_correct_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy() # get everything as one batch
m1 = model1.predict(images_ds)
m2 = model3.predict(images_ds)
scores = []
for alpha in np.linspace(0,1,100):
    cm_probabilities = alpha*m1+(1-alpha)*m2
    cm_predictions = np.argmax(cm_probabilities, axis=-1)
    scores.append(f1_score(cm_correct_labels, cm_predictions, labels=range(104), average='macro'))

best_alpha = np.argmax(scores)/100
print('Best alpha: ' + str(best_alpha))

# Predictions

This/kaggle/create a file that can be submitted to the competition.

In [None]:
test_ds = get_test_dataset(
    ordered=True
)  # since we are splitting the dataset and iterating separately on images and ids, order matters.

print("Computing predictions...")
test_images_ds = test_ds.map(lambda image, idnum: image)
probs1 = model1.predict(test_images_ds)
probs2 = model3.predict(test_images_ds)
probabilities = best_alpha*probs1 + (1-best_alpha)*probs2
predictions = np.argmax(probabilities, axis=-1)
print(predictions)

print("Generating submission.csv file...")
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = (
    next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype("U")
)  # all in one batch
np.savetxt(
    "submission.csv",
    np.rec.fromarrays([test_ids, predictions]),
    fmt=["%s", "%d"],
    delimiter=",",
    header="id,label",
    comments="",
)