Most of the training code is based on the work of Jason Wong, the link to the original code is listed as below：
https://www.kaggle.com/code/jasonhcwong/dog-breed-classification-using-efficientnet/notebook  

In [None]:
import os
from PIL import Image
import xml.etree.ElementTree as ET
import numpy as np
import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.efficientnet_v2 import EfficientNetV2B0
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

Execute the below code if using Google Colab for training, and the training dataset is in Google Drive. 

In [None]:
# 將Google Drive掛載到Colab
drive.mount('/content/drive')

Variables and initialization

In [None]:
epochs = 10
batch_size_per_replica = 128
batch_size_global = 256
# reserve 20% of the images in dataset for evaluation
testsplit = .2
# targetx and targety should be set to the input size of the classification network
targetx = 224
targety = 224
learning_rate = 0.0001
classes = 8                # Modify it accroding the classes you are preparing to train
seed = 777

#Change and create the file directory based on your situation
data_dir = "/content/drive/MyDrive/Stanford_Dogs/Images/"
annotations_dir = "/content/drive/MyDrive/Stanford_Dogs/Annotations/"
cropped_dir = "/content/drive/MyDrive/Stanford_Dogs/Cropped/"
checkpoint_path = "/content/drive/MyDrive/Stanford_Dogs/Checkpoints/"

physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
  tf.config.experimental.set_memory_growth(device, True)

# create a mirrored strategy so that we can utilise multiple GPUs
mirrored_strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(mirrored_strategy.num_replicas_in_sync))

# adjust global batch size according the number of GPUS detected
batch_size_global = (batch_size_per_replica * mirrored_strategy.num_replicas_in_sync)
print('Global batch size: {}'.format(batch_size_global))

Prepare images for training

Note：

    － Stanford Dogs Dataset contains large images of dogs, class labels and bounding boxes. In order to achieve a better result, the bounding boxes are used to crop the close-up of dogs in the images. The cropped close-up images will then be used for training.

In [None]:
%system rm -rf $cropped_dir
%system mkdir $cropped_dir

#this function adapted from https://www.kaggle.com/hengzheng/dog-breeds-classifier
def save_cropped_img(path, annotation, newpath):
    tree = ET.parse(annotation+".xml")                      # Change the +".type" accrding to your annotation file name and format
    # tree = ET.parse(annotation)                           # Enable this if your dataset is from Stanford Dogs only
    xmin = int(tree.getroot().findall('.//xmin')[0].text)
    xmax = int(tree.getroot().findall('.//xmax')[0].text)
    ymin = int(tree.getroot().findall('.//ymin')[0].text)
    ymax = int(tree.getroot().findall('.//ymax')[0].text)
    image = Image.open(path)
    image = image.crop((xmin, ymin, xmax, ymax))
    image = image.convert('RGB')
    image.save(newpath)

def crop_images():
    breeds = os.listdir(data_dir)
    annotations = os.listdir(annotations_dir)

    print('breeds: ', len(breeds), 'annotations: ', len(annotations))

    total_images = 0

    for breed in breeds:
        dir_list = os.listdir(data_dir + breed)
        annotations_dir_list = os.listdir(annotations_dir + breed)
        img_list = [data_dir + breed + '/' + i for i in dir_list]
        os.makedirs(cropped_dir + breed)

        for file in img_list:
            annotation_path = annotations_dir + breed + '/' + os.path.basename(file[:-4])
            newpath = cropped_dir + breed + '/' + os.path.basename(file)
            save_cropped_img(file, annotation_path, newpath)
            total_images += 1
    
    print("total images cropped", total_images)

crop_images()

Prepare dataset for training and evaluation

Note：

    －A tf.data.Dataset is created from the image files in the directory created in the previous step.
    －The dataset splited into a training set of 80% images and a evaluation set of 20%.

In [None]:
trainset, valset = tf.keras.utils.image_dataset_from_directory(
    cropped_dir,
    labels='inferred',
    label_mode='categorical',
    class_names=None,
    color_mode='rgb',
    batch_size=batch_size_global,
    image_size=(targetx, targety),
    shuffle=True,
    seed=seed,
    validation_split=testsplit,
    subset='both',
    interpolation='bilinear',
    follow_links=False,
    crop_to_aspect_ratio=False,
)

options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.AUTO
trainset = trainset.with_options(options).prefetch(tf.data.AUTOTUNE)
valset = valset.with_options(options)

Create Keras callbacks for training

Note：

    －Callbacks that can provides useful information and fine tuning during model training.
        －ModelCheckpoint callback: it saves a model at a speicified frequency
        －TensorBoard callback: it saves logs which enable visualization for TensorBoard for inspection after the training.
        －EarlyStopping callback: it stopes the training when a monitored metric(evaluation accurary in this case) has stopped improving
        －ReduceLROnPlateau callback: it decrease the learning rate when a metric(evaluation accurary in this case) has stopped improving.

In [None]:
checkpoint = ModelCheckpoint(checkpoint_path,
                             monitor='val_accuracy',
                             save_best_only=False,
                             verbose=1,
                             mode='auto',
                             save_weights_only=False,
                             save_freq='epoch')

#https://github.com/keras-team/keras/issues/3358
tensorboard = TensorBoard(log_dir="./logs",
                            histogram_freq=0,
                            batch_size=batch_size_global,
                            write_graph=False,
                            update_freq='epoch')

earlystop = EarlyStopping(monitor='val_accuracy',
                          min_delta=.0001,
                          patience=10,
                          verbose=1,
                          mode='auto',
                          baseline=None,
                          restore_best_weights=True)

reducelr = ReduceLROnPlateau(monitor='val_accuracy',
                             factor=np.sqrt(.1),
                             patience=5,
                             verbose=1,
                             mode='auto',
                             min_delta=.0001,
                             cooldown=0,
                             min_lr=0.0000001)

Build the model

Note：

    － A pre-trained model EfficientNetV2B0 from Keras library is used for transfer learning, We copy the network weights except the top layers from the pre-trained model.
    - The model can be trained much faster by using transfer learning.

In [None]:
with mirrored_strategy.scope():
    base_model = EfficientNetV2B0(include_top=False, weights='imagenet', input_shape=(targetx, targety, 3))
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dense(1280, activation='relu', bias_initializer='zeros')(x)
    x = BatchNormalization()(x)
    predictions = Dense(classes, activation='softmax', kernel_initializer='random_uniform', bias_initializer='zeros')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    optimizer = Adam(lr=learning_rate)

    loss = "categorical_crossentropy"

    for layer in model.layers:
        layer.trainable = True
    model.compile(optimizer=optimizer,
                loss=loss,
                metrics=["accuracy"])

#model.summary()
#for i, layer in enumerate(model.layers):
#    print(i, layer.name, layer.trainable)

Train the model

In [None]:
%%time

params = model.fit(trainset, 
                    validation_data=valset,
                    epochs=epochs,
                    callbacks=[reducelr, earlystop, tensorboard, checkpoint])

Save the model

In [None]:
# save the trained model as TensorFlow SavedModel format
model.save('/content/drive/MyDrive/Stanford_Dogs/Model/')


Convert saved Keras model into TensorFlowLite format

In [None]:
keras_model = tf.keras.models.load_model('/content/drive/MyDrive/Stanford_Dogs/Model/')
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.experimental_new_converter = True
tflite_model = converter.convert()
# open('converted_model.tflite', 'wb').write(tflite_model)
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)