## Setup and data loading

In [None]:
import numpy as np
import tensorflow as tf  # For tf.data
import matplotlib.pyplot as plt
import keras
from keras import layers
from keras.applications import EfficientNetB3

# IMG_SIZE is determined by EfficientNet model choice
IMG_SIZE = 300
BATCH_SIZE = 32


In [None]:
from google.colab import drive
drive.mount('/content/drive')
import zipfile
with zipfile.ZipFile('/content/drive/MyDrive/plant.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/images')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
obv=pd.read_csv('observations-497982.csv')

In [None]:
df=obv[obv['iconic_taxon_name']=='Plantae']
len(df)

41704

In [None]:
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from collections import Counter

# Create an empty list to store images and labels
images = []
labels = []

# Create a Counter to count occurrences of each label (scientific_name)
label_counts = Counter(df['scientific_name'])

# Get the top 100 labels with the most occurrences
top_100_labels = [label for label, count in label_counts.most_common(20)]

# Loop through the CSV to read image ids and their corresponding labels
for index, row in df.iterrows():
    img_path = f'/content/images/data/plant_img/{row["id"]}.jpg'  # Assuming the image is in the same folder
    label = row['scientific_name']

    # Check if the image file exists and if the label is in the top 100
    if os.path.exists(img_path) and label in top_100_labels:
        # Load and preprocess the image
        img = load_img(img_path, target_size=(224, 224))  # Resize image to 224x224 or any preferred size
        img = img_to_array(img)  # Convert to array
        images.append(img)
        labels.append(label)

# Convert lists to arrays
images = np.array(images)
labels = np.array(labels)

# Combine images and labels into a dataset
data = list(zip(images, labels))

In [None]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=10)



In [None]:
# Split train and test data into separate images and labels
train_images = np.array([item[0] for item in train_data])
train_labels = np.array([item[1] for item in train_data])
test_images = np.array([item[0] for item in test_data])
test_labels = np.array([item[1] for item in test_data])

In [None]:
from sklearn.preprocessing import LabelEncoder

# Fit the label encoder on both the train and test labels
all_labels = np.concatenate([train_labels, test_labels])

label_encoder = LabelEncoder()
label_encoder.fit(all_labels)  # Fit on all available labels

# Transform both the train and test labels
train_labels = label_encoder.transform(train_labels)
test_labels = label_encoder.transform(test_labels)

# Now one-hot encode the labels
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=np.unique(all_labels).size)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=np.unique(all_labels).size)


# Convert to a tf.data.Dataset
ds_train = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
ds_test = tf.data.Dataset.from_tensor_slices((test_images, test_labels))

In [None]:
NUM_CLASSES= np.unique(labels).size

When the dataset include images with various size, we need to resize them into a
shared size. The Stanford Dogs dataset includes only images at least 200x200
pixels in size. Here we resize the images to the input size needed for EfficientNet.

In [None]:
size = (IMG_SIZE, IMG_SIZE)
ds_train = ds_train.map(lambda image, label: (tf.image.resize(image, size), label))
ds_test = ds_test.map(lambda image, label: (tf.image.resize(image, size), label))

### Prepare inputs

Once we verify the input data and augmentation are working correctly,
we prepare dataset for training. The input data are resized to uniform
`IMG_SIZE`. The labels are put into one-hot
(a.k.a. categorical) encoding. The dataset is batched.

Note: `prefetch` and `AUTOTUNE` may in some situation improve
performance, but depends on environment and the specific dataset used.
See this [guide](https://www.tensorflow.org/guide/data_performance)
for more information on data pipeline performance.

In [None]:
# # Define augmentation layers
# img_augmentation_layers = [
#     layers.RandomRotation(factor=0.15),
#     layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
#     layers.RandomFlip(),
#     layers.RandomContrast(factor=0.1),
# ]

# def img_augmentation(images):
#     for layer in img_augmentation_layers:
#         images = layer(images)
#     return images

# # Apply augmentation to the train dataset
# ds_train = ds_train.map(lambda image, label: (img_augmentation(image), label))  # Apply augmentation


In [None]:
ds_train = ds_train.shuffle(buffer_size=100).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(32).prefetch(tf.data.experimental.AUTOTUNE)

## Transfer learning from pre-trained weights

Here we initialize the model with pre-trained ImageNet weights,
and we fine-tune it on our own dataset.

In [None]:

def build_model(num_classes):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    model = EfficientNetB3(include_top=False, input_tensor=inputs, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model


The first step to transfer learning is to freeze all layers and train only the top
layers. For this step, a relatively large learning rate (1e-2) can be used.
Note that validation accuracy and loss will usually be better than training
accuracy and loss. This is because the regularization is strong, which only
suppresses training-time metrics.

Note that the convergence may take up to 50 epochs depending on choice of learning rate.
If image augmentation layers were not
applied, the validation accuracy may only reach ~60%.

In [15]:
model = build_model(num_classes=NUM_CLASSES)

epochs = 15

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=3, restore_best_weights=True
)

model.fit(ds_train, epochs=epochs, validation_data=ds_test, callbacks=[early_stopping])

Epoch 1/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 462ms/step - accuracy: 0.3984 - loss: 3.1017 - val_accuracy: 0.5401 - val_loss: 1.4647
Epoch 2/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 164ms/step - accuracy: 0.6659 - loss: 1.5499 - val_accuracy: 0.5508 - val_loss: 1.5635
Epoch 3/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 172ms/step - accuracy: 0.7043 - loss: 1.1915 - val_accuracy: 0.6168 - val_loss: 1.4134
Epoch 4/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 161ms/step - accuracy: 0.7471 - loss: 1.0076 - val_accuracy: 0.6328 - val_loss: 1.6894
Epoch 5/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 165ms/step - accuracy: 0.7646 - loss: 0.9414 - val_accuracy: 0.6346 - val_loss: 1.8589
Epoch 6/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 170ms/step - accuracy: 0.7851 - loss: 0.8391 - val_accuracy: 0.6399 - val_loss: 1.8919


<keras.src.callbacks.history.History at 0x7cc8142914e0>

The second step is to unfreeze a number of layers and fit the model using smaller
learning rate. In this example we show unfreezing all layers, but depending on
specific dataset it may be desireble to only unfreeze a fraction of all layers.

When the feature extraction with
pretrained model works good enough, this step would give a very limited gain on
validation accuracy. In our case we only see a small improvement,
as ImageNet pretraining already exposed the model to a good amount of dogs.

On the other hand, when we use pretrained weights on a dataset that is more different
from ImageNet, this fine-tuning step can be crucial as the feature extractor also
needs to be adjusted by a considerable amount. Such a situation can be demonstrated
if choosing CIFAR-100 dataset instead, where fine-tuning boosts validation accuracy
by about 10% to pass 80% on `EfficientNetB0`.

A side note on freezing/unfreezing models: setting `trainable` of a `Model` will
simultaneously set all layers belonging to the `Model` to the same `trainable`
attribute. Each layer is trainable only if both the layer itself and the model
containing it are trainable. Hence when we need to partially freeze/unfreeze
a model, we need to make sure the `trainable` attribute of the model is set
to `True`.

In [None]:

def unfreeze_model(model):
    # We unfreeze the top 20 layers while leaving BatchNorm layers frozen
    for layer in model.layers[-20:]:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    optimizer = keras.optimizers.Adam(learning_rate=1e-5)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )


unfreeze_model(model)

epochs = 5 # @param {type: "slider", min:4, max:10}
model.fit(ds_train, epochs=epochs, validation_data=ds_test)
# plot_hist(hist)

In [16]:
# Evaluate on test dataset
test_loss, test_accuracy = model.evaluate(ds_test)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 118ms/step - accuracy: 0.6221 - loss: 1.4150
Test Accuracy: 61.68%


In [17]:
model.save('model.h5')
from google.colab import files
files.download("model.h5")
import pickle
with open('Plant_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Tips for fine tuning EfficientNet

On unfreezing layers:

- The `BatchNormalization` layers need to be kept frozen
([more details](https://keras.io/guides/transfer_learning/)).
If they are also turned to trainable, the
first epoch after unfreezing will significantly reduce accuracy.
- In some cases it may be beneficial to open up only a portion of layers instead of
unfreezing all. This will make fine tuning much faster when going to larger models like
B7.
- Each block needs to be all turned on or off. This is because the architecture includes
a shortcut from the first layer to the last layer for each block. Not respecting blocks
also significantly harms the final performance.

Some other tips for utilizing EfficientNet:

- Larger variants of EfficientNet do not guarantee improved performance, especially for
tasks with less data or fewer classes. In such a case, the larger variant of EfficientNet
chosen, the harder it is to tune hyperparameters.
- EMA (Exponential Moving Average) is very helpful in training EfficientNet from scratch,
but not so much for transfer learning.
- Do not use the RMSprop setup as in the original paper for transfer learning. The
momentum and learning rate are too high for transfer learning. It will easily corrupt the
pretrained weight and blow up the loss. A quick check is to see if loss (as categorical
cross entropy) is getting significantly larger than log(NUM_CLASSES) after the same
epoch. If so, the initial learning rate/momentum is too high.
- Smaller batch size benefit validation accuracy, possibly due to effectively providing
regularization.