In [4]:
# --- Environment setup for all models ---
!pip install --quiet --upgrade tensorflow scikit-learn pyswarms timm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m620.7/620.7 MB[0m [31m750.1 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m108.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m98.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.20.0 which is incompatible.
tf-keras 2.19.0 requires tensorflow<2.20,>=2.19, but you have tensorflow 2.20.0 which is incompatible.
tensorflow-text 2.19.0 requires tensorflow<2.20,>=2.19.0, but you have tensorflow 2.20.0 which is incompatible.[0m

In [5]:

import os, time, itertools, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.utils import shuffle
from scipy.stats import mode
from statsmodels.stats.contingency_tables import mcnemar
import tensorflow as tf
from tensorflow.keras import layers, models, applications
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow_hub as hub
import pyswarms as ps

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:

!mkdir -p ~/.kaggle
!cp '/content/drive/MyDrive/projectResearchPaper/plant/kaggle.json' ~/.kaggle/
# !cp '/content/drive/MyDrive/project_iiit/kaggle.json' ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [8]:
#!/bin/bash
!kaggle datasets download warcoder/indian-medicinal-plant-image-dataset

Dataset URL: https://www.kaggle.com/datasets/warcoder/indian-medicinal-plant-image-dataset
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading indian-medicinal-plant-image-dataset.zip to /content
 53% 134M/253M [00:00<00:00, 1.40GB/s]
100% 253M/253M [00:03<00:00, 71.5MB/s]


In [9]:
import zipfile
with zipfile.ZipFile('/content/indian-medicinal-plant-image-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content')

In [10]:
import pathlib, os, random, shutil
data_dir = '/content/Medicinal plant dataset'
data_dir_path = pathlib.Path(data_dir)
classes = [d.name for d in data_dir_path.iterdir() if d.is_dir()]

In [11]:
# Create train/val/test folders
for split in ['train', 'val', 'test']:
    for cls in classes:
        os.makedirs(f'/content/Medicinal plant dataset/{split}/{cls}', exist_ok=True)

In [12]:
# Split ratios
train_ratio, val_ratio, test_ratio = 0.7, 0.2, 0.1

for cls in classes:
    imgs = list(data_dir_path.glob(f'{cls}/*'))
    random.shuffle(imgs)
    n = len(imgs)
    n_train = int(train_ratio * n)
    n_val = int(val_ratio * n)
    n_test = n - n_train - n_val
    for i, img_path in enumerate(imgs):
        if i < n_train:
            shutil.copy(img_path, f'/content/Medicinal plant dataset/train/{cls}/')
        elif i < n_train + n_val:
            shutil.copy(img_path, f'/content/Medicinal plant dataset/val/{cls}/')
        else:
            shutil.copy(img_path, f'/content/Medicinal plant dataset/test/{cls}/')

dataset_dir = '/content/Medicinal plant dataset'

In [13]:
# =============================
# 1. Load Datasets
# =============================
import tensorflow as tf
IMG_SIZE = (224,224)
BATCH_SIZE = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(dataset_dir,'train'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(dataset_dir,'val'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)
test_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(dataset_dir,'test'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

class_names = train_ds.class_names
num_classes = len(class_names)
print(f"Classes: {class_names}")

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

Found 4151 files belonging to 40 classes.
Found 1180 files belonging to 40 classes.
Found 614 files belonging to 40 classes.
Classes: ['Aloevera', 'Amla', 'Amruta_Balli', 'Arali', 'Ashoka', 'Ashwagandha', 'Avacado', 'Bamboo', 'Basale', 'Betel', 'Betel_Nut', 'Brahmi', 'Castor', 'Curry_Leaf', 'Doddapatre', 'Ekka', 'Ganike', 'Gauva', 'Geranium', 'Henna', 'Hibiscus', 'Honge', 'Insulin', 'Jasmine', 'Lemon', 'Lemon_grass', 'Mango', 'Mint', 'Nagadali', 'Neem', 'Nithyapushpa', 'Nooni', 'Pappaya', 'Pepper', 'Pomegranate', 'Raktachandini', 'Rose', 'Sapota', 'Tulasi', 'Wood_sorel']


In [14]:
# =========== Common training helpers ===========
early_stop = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1)
reduce_lr  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
EPOCHS = 20
def compile_and_fit(model, name, epochs=EPOCHS):
    ckpt = ModelCheckpoint(os.path.join(data_dir, f"{name}_best.h5"), save_best_only=True, monitor='val_loss')
    hist = model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks=[early_stop, reduce_lr, ckpt], verbose=2)
    model.load_weights(os.path.join(data_dir, f"{name}_best.h5"))
    return hist

In [15]:
def save_plot_history(hist, outpath):
    plt.figure(figsize=(6,4))
    plt.plot(hist.history['accuracy'], label='train_acc')
    plt.plot(hist.history['val_accuracy'], label='val_acc')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend(); plt.title('Accuracy Curve')
    plt.savefig(outpath)
    plt.close()

def plot_confusion(cm, labels, outpath, title="Confusion matrix"):
    plt.figure(figsize=(7,6))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels, cmap='Blues')
    plt.xlabel('Predicted'); plt.ylabel('True'); plt.title(title)
    plt.savefig(outpath); plt.close()

In [16]:
# =========== Build models ===========
def build_mobilenetv2(input_shape=(*IMG_SIZE,3)):
    base = applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    base.trainable=False
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = Model(base.input, out)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model


def build_resnet152(input_shape=(*IMG_SIZE,3)):
    base = applications.ResNet152(weights='imagenet', include_top=False, input_shape=input_shape)
    base.trainable=False
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = Model(base.input, out)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

def build_xception(input_shape=(*IMG_SIZE,3)):
    # Xception expects 299x299; if IMG_SIZE is smaller, it still works but less ideal.
    base = applications.Xception(weights='imagenet', include_top=False, input_shape=input_shape)
    base.trainable=False
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = Model(base.input, out)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

def build_mobilenetv3(input_shape=(*IMG_SIZE,3), model_type='large'):
    try:
        if model_type=='large':
            base = applications.MobileNetV3Large(weights='imagenet', include_top=False, input_shape=input_shape)
        else:
            base = applications.MobileNetV3Small(weights='imagenet', include_top=False, input_shape=input_shape)
    except Exception as e:
        print("MobileNetV3 not found in this TF. Falling back to MobileNetV2. Error:", e)
        return build_mobilenetv2(input_shape)
    base.trainable=False
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = Model(base.input, out)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

def build_resnet50_for_feats(input_shape=(*IMG_SIZE,3)):
    base = applications.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    base.trainable=False
    x = layers.GlobalAveragePooling2D()(base.output)
    model = Model(base.input, x)
    model.summary()
    return model

# === BiT model (attempt via TF-Hub) / fallback to ResNet50 ===
def build_bit_feature_extractor(input_shape=(*IMG_SIZE,3)):
    # NOTE: change this URL to the official BiT TF-Hub link if you want exact BiT weights.
    try:
        BIT_URL = "https://tfhub.dev/google/bit/s-r50x1/1"  # example: replace with actual working version
        hub_layer = hub.KerasLayer(BIT_URL, trainable=False, arguments=dict(input_shape=input_shape))
        inp = layers.Input(shape=input_shape)
        out = hub_layer(inp)
        model = Model(inp, out)
        model.summary()
        return model
    except Exception as e:
        print("TF-Hub BiT not available; fallback to ResNet50 features. Error:", e)
        return build_resnet50_for_feats(input_shape)


In [None]:
# =========== Train all models ===========
models_to_train = {}

# 1. MobileNetV2
models_to_train['MobileNetV2'] = build_mobilenetv2()

# 2. ResNet152
models_to_train['ResNet152'] = build_resnet152()

# 3. Xception
models_to_train['Xception'] = build_xception()

# 4. MobileNetV3 (large)
models_to_train['MobileNetV3'] = build_mobilenetv3(model_type='large')


# 6. BiT (feature extractor) - fallback to ResNet50 features if TF-Hub not available
bit_feat_extractor = build_bit_feature_extractor()

# 5. ResNet50 (for ensemble and PSO-SVM features)
models_to_train['ResNet50'] = build_resnet50_for_feats()  # feature extractor used later; we'll also create classifier variant if needed


# Train & evaluate classifier variants (skip pure feature-only models from training loop)
trained_classifiers = {}
histories = {}
reports = {}
cms = {}

for name, model in list(models_to_train.items()):
    if name == 'ResNet50':
        # don't train the feature-only extractor as classifier; instead build a small classifier head for it
        inputs = model.input
        x = model.output
        x = layers.Dense(256, activation='relu')(x)
        x = layers.Dropout(0.3)(x)
        out = layers.Dense(num_classes, activation='softmax')(x)
        clf = Model(inputs, out)
        clf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        print(f"Training classifier version of {name}")
        hist = compile_and_fit(clf, name)
        histories[name] = hist
        trained_classifiers[name] = clf
    else:
        print(f"Training {name}")
        hist = compile_and_fit(model, name)
        histories[name] = hist
        trained_classifiers[name] = model

    # Evaluate on test set
    model_e = trained_classifiers[name]
    preds_prob = model_e.predict(test_ds, verbose=0)
    preds = np.argmax(preds_prob, axis=1)
    y_true = np.concatenate([y for x,y in test_ds], axis=0)
    rep = classification_report(y_true, preds, output_dict=True, zero_division=0)
    cm = confusion_matrix(y_true, preds)
    reports[name] = rep
    cms[name] = cm
    # save artifacts
    pd.DataFrame(rep).transpose().to_csv(os.path.join(data_dir, f"{name}_classification_report.csv"))
    np.save(os.path.join(data_dir, f"{name}_cm.npy"), cm)
    plot_confusion(cm, class_names, os.path.join(data_dir, f"{name}_confusion.png"), title=f"{name} Confusion Matrix")
    save_plot_history(histories[name], os.path.join(data_dir, f"{name}_train_curve.png"))

# =========== BiT classifier (if bit_feat_extractor available) ===========
try:
    print("Trying BiT features/classifier (if TF-Hub BiT loaded)...")
    # If hub layer produced vector outputs, build a small classifier on top and train
    inp = bit_feat_extractor.input
    out = bit_feat_extractor.output
    x = layers.Dense(256, activation='relu')(out)
    x = layers.Dropout(0.3)(x)
    outc = layers.Dense(num_classes, activation='softmax')(x)
    bit_clf = Model(inp, outc)
    bit_clf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    hist = compile_and_fit(bit_clf, 'BiT_s-r50x1')
    histories['BiT_s-r50x1'] = hist
    trained_classifiers['BiT_s-r50x1'] = bit_clf
    preds_prob = bit_clf.predict(test_ds); preds = np.argmax(preds_prob, axis=1)
    y_true = np.concatenate([y for x,y in test_ds], axis=0)
    rep_bit = classification_report(y_true, preds, output_dict=True, zero_division=0)
    cms['BiT_s-r50x1'] = confusion_matrix(y_true, preds)
    pd.DataFrame(rep_bit).transpose().to_csv(os.path.join(data_dir, "BiT_s-r50x1_classification_report.csv"))
    plot_confusion(cms['BiT_s-r50x1'], class_names, os.path.join(data_dir, "BiT_s-r50x1_confusion.png"), title="BiT Confusion")
    reports['BiT_s-r50x1'] = rep_bit
except Exception as e:
    print("BiT classifier skipped (TF-Hub issue). Error:", e)

# =========== Ensemble: MobileNetV2 + ResNet50 + InceptionV3 (majority vote) ===========
print("Building Ensemble (MobileNetV2, ResNet50, InceptionV3)...")
ensemble_models = []
for nm in ['MobileNetV2', 'ResNet50']:
    ensemble_models.append(trained_classifiers[nm])

# Add InceptionV3 (build & train a classifier quickly if not already)
if 'InceptionV3' not in trained_classifiers:
    try:
        inc = applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE,3))
        x = layers.GlobalAveragePooling2D()(inc.output); x = layers.Dense(128, activation='relu')(x); x = layers.Dropout(0.3)(x)
        out = layers.Dense(num_classes, activation='softmax')(x)
        inc_clf = Model(inc.input, out); inc_clf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        hist = compile_and_fit(inc_clf, 'InceptionV3')
        trained_classifiers['InceptionV3'] = inc_clf
        ensemble_models.append(inc_clf)
    except Exception as e:
        print("InceptionV3 training skipped or failed, error:", e)


# # =========== ResNet50 -> PSO -> SVM pipeline ===========
# # Extract ResNet50 pooled features for train/val/test, then optimize SVM hyperparams (C, gamma) with PSO.
# print("Building features for ResNet50 -> PSO -> SVM...")
# feature_model = build_resnet50_for_feats()
# def extract_features(dataset, model):
#     feats, labels = [], []
#     for x_batch, y_batch in dataset:
#         preds = model.predict(x_batch, verbose=0)
#         feats.append(preds)
#         labels.append(y_batch.numpy())
#     feats = np.vstack(feats); labels = np.concatenate(labels)
#     return feats, labels

# X_train_feats, y_train_feats = extract_features(train_ds, feature_model)
# X_val_feats, y_val_feats = extract_features(val_ds, feature_model)
# X_test_feats, y_test_feats = extract_features(test_ds, feature_model)

# # scale
# scaler = StandardScaler().fit(X_train_feats)
# X_train_s = scaler.transform(X_train_feats)
# X_test_s  = scaler.transform(X_test_feats)
# y_test = y_test_feats

# # PSO objective: cross-val on val set to minimize 1-accuracy
# def pso_obj(params):
#     # params shape: (n_particles, 2)  -> [log10(C), log10(gamma)]
#     n = params.shape[0]
#     scores = np.zeros(n)
#     for i in range(n):
#         logC, logG = params[i]
#         C = 10**logC
#         gamma = 10**logG
#         clf = SVC(C=C, gamma=gamma, kernel='rbf')
#         clf.fit(X_train_s, y_train_feats)
#         score = clf.score(X_val_feats if False else X_val_feats, y_val_feats)  # using raw features is okay
#         scores[i] = 1.0 - score
#     return scores

# # PSO bounds: search log10(C) in [-3,3], log10(gamma) in [-4,1]
# options = {'c1':0.5, 'c2':0.3, 'w':0.9}
# bounds = (np.array([-3,-4]), np.array([3,1]))
# optimizer = ps.single.GlobalBestPSO(n_particles=20, dimensions=2, options=options, bounds=bounds)
# best_cost, best_pos = optimizer.optimize(pso_obj, iters=30, verbose=False)
# bestC = 10**best_pos[0]; bestGamma = 10**best_pos[1]
# print("PSO found best C, gamma:", bestC, bestGamma)

# svm = SVC(C=bestC, gamma=bestGamma, kernel='rbf', probability=True)
# svm.fit(X_train_s, y_train_feats)
# y_pred_svm = svm.predict(scaler.transform(X_test_feats))
# rep_svm = classification_report(y_test, y_pred_svm, output_dict=True, zero_division=0)
# cm_svm = confusion_matrix(y_test, y_pred_svm)
# pd.DataFrame(rep_svm).transpose().to_csv(os.path.join(data_dir, "ResNet50_PSO_SVM_classification_report.csv"))
# np.save(os.path.join(data_dir, "ResNet50_PSO_SVM_cm.npy"), cm_svm)
# plot_confusion(cm_svm, class_names, os.path.join(data_dir, "ResNet50_PSO_SVM_confusion.png"), title="ResNet50-PSO-SVM Confusion Matrix")
# reports['ResNet50-PSO-SVM'] = rep_svm
# cms['ResNet50-PSO-SVM'] = cm_svm


# Predict with ensemble
y_true = np.concatenate([y for x,y in test_ds], axis=0)
probs = []
for m in ensemble_models:
    probs.append(m.predict(test_ds, verbose=0))
probs = np.stack(probs, axis=0)  # shape (n_models, n_samples, n_classes)
preds_ensemble = np.argmax(np.mean(probs, axis=0), axis=1)  # average probabilities
rep_ens = classification_report(y_true, preds_ensemble, output_dict=True, zero_division=0)
cm_ens = confusion_matrix(y_true, preds_ensemble)
pd.DataFrame(rep_ens).transpose().to_csv(os.path.join(data_dir, "Ensemble_classification_report.csv"))
np.save(os.path.join(data_dir, "Ensemble_cm.npy"), cm_ens)
plot_confusion(cm_ens, class_names, os.path.join(data_dir, "Ensemble_confusion.png"), title="Ensemble Confusion Matrix")
reports['Ensemble'] = rep_ens
cms['Ensemble'] = cm_ens


# 2) Model sizes & inference times (single sample)
def model_size_and_time(model):
    # save weights to temp and measure size
    tmp_path = os.path.join(data_dir, "tmp_model.weights.h5")
    model.save_weights(tmp_path)
    size_mb = os.path.getsize(tmp_path)/1024/1024
    os.remove(tmp_path)
    # time inference of 100 runs
    sample = np.random.randn(1, IMG_SIZE[0], IMG_SIZE[1], 3).astype('float32')
    start = time.time()
    for _ in range(50):
        _ = model.predict(sample)
    t = (time.time()-start)/50
    return round(size_mb,3), round(t*1000,3)  # MB, ms
rows=[]
for name, model in trained_classifiers.items():
    s, tms = model_size_and_time(model)
    rows.append([name, s, tms])
pd.DataFrame(rows, columns=['Model','Size(MB)','AvgInference(ms)']).to_csv(os.path.join(data_dir,"model_size_inference.csv"), index=False)

# 3) Ablation idea: train small MobileNetV2 without augmentation vs with augmentation (code left as description)
# (You can run two experiments and include results in paper: augmentation improves per-class recall for visually variable species.)

print("All done. Artifacts saved to:", data_dir)
print("Key files: models_summary_table.csv, per-model classification reports, confusion matrices (*.png), gradcam images, tsne_embeddings.png")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m234698864/234698864[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_large_224_1.0_float_no_top_v2.h5
[1m12683000/12683000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


2025-10-09 06:25:17,209 - absl - INFO - Using /tmp/tfhub_modules to cache modules.
2025-10-09 06:25:17,211 - absl - INFO - Downloading TF-Hub Module 'https://tfhub.dev/google/bit/s-r50x1/1'.
2025-10-09 06:25:18,859 - absl - INFO - Downloaded https://tfhub.dev/google/bit/s-r50x1/1, Total size: 99.69MB
2025-10-09 06:25:18,861 - absl - INFO - Downloaded TF-Hub Module 'https://tfhub.dev/google/bit/s-r50x1/1'.
2025-10-09 06:25:31,272 - absl - INFO - Fingerprint not found. Saved model loading will continue.
2025-10-09 06:25:31,274 - absl - INFO - path_and_singleprint metric could not be logged. Saved model loading will continue.


TF-Hub BiT not available; fallback to ResNet50 features. Error: Exception encountered when calling layer 'keras_layer' (type KerasLayer).

Binding inputs to tf.function failed due to `got an unexpected keyword argument 'input_shape'`. Received args: (<KerasTensor shape=(None, 224, 224, 3), dtype=float32, sparse=False, ragged=False, name=keras_tensor_1020>,) and kwargs: {'input_shape': (224, 224, 3), 'training': False} for signature: (x, training=<captured_default_value>).

Call arguments received by layer 'keras_layer' (type KerasLayer):
  • inputs=<KerasTensor shape=(None, 224, 224, 3), dtype=float32, sparse=False, ragged=False, name=keras_tensor_1020>
  • training=None
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Training MobileNetV2
Epoch 1/20




130/130 - 60s - 458ms/step - accuracy: 0.0891 - loss: 3.4624 - val_accuracy: 0.2144 - val_loss: 3.0448 - learning_rate: 1.0000e-03
Epoch 2/20




130/130 - 4s - 34ms/step - accuracy: 0.2057 - loss: 2.9275 - val_accuracy: 0.2941 - val_loss: 2.6571 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 5s - 35ms/step - accuracy: 0.2891 - loss: 2.5865 - val_accuracy: 0.3661 - val_loss: 2.3846 - learning_rate: 1.0000e-03
Epoch 4/20




130/130 - 5s - 35ms/step - accuracy: 0.3421 - loss: 2.3703 - val_accuracy: 0.4136 - val_loss: 2.2094 - learning_rate: 1.0000e-03
Epoch 5/20




130/130 - 4s - 34ms/step - accuracy: 0.3693 - loss: 2.2122 - val_accuracy: 0.4763 - val_loss: 2.0684 - learning_rate: 1.0000e-03
Epoch 6/20




130/130 - 6s - 48ms/step - accuracy: 0.4112 - loss: 2.1057 - val_accuracy: 0.4873 - val_loss: 1.9569 - learning_rate: 1.0000e-03
Epoch 7/20




130/130 - 8s - 65ms/step - accuracy: 0.4257 - loss: 2.0177 - val_accuracy: 0.5178 - val_loss: 1.8581 - learning_rate: 1.0000e-03
Epoch 8/20




130/130 - 5s - 36ms/step - accuracy: 0.4476 - loss: 1.9074 - val_accuracy: 0.5102 - val_loss: 1.8470 - learning_rate: 1.0000e-03
Epoch 9/20




130/130 - 4s - 34ms/step - accuracy: 0.4787 - loss: 1.8094 - val_accuracy: 0.5381 - val_loss: 1.7579 - learning_rate: 1.0000e-03
Epoch 10/20




130/130 - 5s - 35ms/step - accuracy: 0.4695 - loss: 1.7929 - val_accuracy: 0.5703 - val_loss: 1.6840 - learning_rate: 1.0000e-03
Epoch 11/20




130/130 - 5s - 36ms/step - accuracy: 0.5177 - loss: 1.6803 - val_accuracy: 0.5475 - val_loss: 1.6582 - learning_rate: 1.0000e-03
Epoch 12/20




130/130 - 4s - 35ms/step - accuracy: 0.5235 - loss: 1.6131 - val_accuracy: 0.5780 - val_loss: 1.5764 - learning_rate: 1.0000e-03
Epoch 13/20




130/130 - 5s - 36ms/step - accuracy: 0.5355 - loss: 1.5845 - val_accuracy: 0.5898 - val_loss: 1.5378 - learning_rate: 1.0000e-03
Epoch 14/20




130/130 - 5s - 36ms/step - accuracy: 0.5411 - loss: 1.5235 - val_accuracy: 0.5831 - val_loss: 1.5232 - learning_rate: 1.0000e-03
Epoch 15/20




130/130 - 4s - 34ms/step - accuracy: 0.5461 - loss: 1.5177 - val_accuracy: 0.6059 - val_loss: 1.5114 - learning_rate: 1.0000e-03
Epoch 16/20




130/130 - 5s - 36ms/step - accuracy: 0.5589 - loss: 1.4708 - val_accuracy: 0.5992 - val_loss: 1.4758 - learning_rate: 1.0000e-03
Epoch 17/20
130/130 - 4s - 31ms/step - accuracy: 0.5729 - loss: 1.4384 - val_accuracy: 0.5992 - val_loss: 1.4764 - learning_rate: 1.0000e-03
Epoch 18/20




130/130 - 4s - 34ms/step - accuracy: 0.5832 - loss: 1.4106 - val_accuracy: 0.6237 - val_loss: 1.4264 - learning_rate: 1.0000e-03
Epoch 19/20
130/130 - 5s - 37ms/step - accuracy: 0.5965 - loss: 1.3525 - val_accuracy: 0.6203 - val_loss: 1.4282 - learning_rate: 1.0000e-03
Epoch 20/20




130/130 - 4s - 34ms/step - accuracy: 0.5938 - loss: 1.3317 - val_accuracy: 0.6136 - val_loss: 1.4048 - learning_rate: 1.0000e-03
Restoring model weights from the end of the best epoch: 20.
Training ResNet152
Epoch 1/20




130/130 - 95s - 730ms/step - accuracy: 0.4127 - loss: 2.2542 - val_accuracy: 0.7136 - val_loss: 1.1071 - learning_rate: 1.0000e-03
Epoch 2/20




130/130 - 39s - 301ms/step - accuracy: 0.7230 - loss: 0.9685 - val_accuracy: 0.8381 - val_loss: 0.6324 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 38s - 292ms/step - accuracy: 0.8244 - loss: 0.5994 - val_accuracy: 0.8754 - val_loss: 0.4675 - learning_rate: 1.0000e-03
Epoch 4/20




130/130 - 39s - 296ms/step - accuracy: 0.8750 - loss: 0.4450 - val_accuracy: 0.8958 - val_loss: 0.3889 - learning_rate: 1.0000e-03
Epoch 5/20




130/130 - 42s - 320ms/step - accuracy: 0.9036 - loss: 0.3322 - val_accuracy: 0.9000 - val_loss: 0.3557 - learning_rate: 1.0000e-03
Epoch 6/20




130/130 - 42s - 320ms/step - accuracy: 0.9299 - loss: 0.2541 - val_accuracy: 0.9144 - val_loss: 0.3033 - learning_rate: 1.0000e-03
Epoch 7/20




130/130 - 43s - 329ms/step - accuracy: 0.9371 - loss: 0.2182 - val_accuracy: 0.9136 - val_loss: 0.3002 - learning_rate: 1.0000e-03
Epoch 8/20




130/130 - 43s - 333ms/step - accuracy: 0.9504 - loss: 0.1825 - val_accuracy: 0.9229 - val_loss: 0.2702 - learning_rate: 1.0000e-03
Epoch 9/20
130/130 - 36s - 279ms/step - accuracy: 0.9487 - loss: 0.1740 - val_accuracy: 0.9254 - val_loss: 0.2883 - learning_rate: 1.0000e-03
Epoch 10/20




130/130 - 44s - 336ms/step - accuracy: 0.9586 - loss: 0.1427 - val_accuracy: 0.9237 - val_loss: 0.2651 - learning_rate: 1.0000e-03
Epoch 11/20




130/130 - 43s - 331ms/step - accuracy: 0.9687 - loss: 0.1122 - val_accuracy: 0.9288 - val_loss: 0.2387 - learning_rate: 1.0000e-03
Epoch 12/20
130/130 - 37s - 281ms/step - accuracy: 0.9704 - loss: 0.1036 - val_accuracy: 0.9246 - val_loss: 0.2600 - learning_rate: 1.0000e-03
Epoch 13/20
130/130 - 36s - 276ms/step - accuracy: 0.9735 - loss: 0.0934 - val_accuracy: 0.9288 - val_loss: 0.2515 - learning_rate: 1.0000e-03
Epoch 14/20

Epoch 14: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
130/130 - 36s - 276ms/step - accuracy: 0.9713 - loss: 0.0931 - val_accuracy: 0.9144 - val_loss: 0.3046 - learning_rate: 1.0000e-03
Epoch 15/20




130/130 - 43s - 333ms/step - accuracy: 0.9800 - loss: 0.0685 - val_accuracy: 0.9381 - val_loss: 0.2264 - learning_rate: 5.0000e-04
Epoch 16/20




130/130 - 45s - 347ms/step - accuracy: 0.9848 - loss: 0.0562 - val_accuracy: 0.9356 - val_loss: 0.2211 - learning_rate: 5.0000e-04
Epoch 17/20




130/130 - 45s - 343ms/step - accuracy: 0.9899 - loss: 0.0475 - val_accuracy: 0.9373 - val_loss: 0.2132 - learning_rate: 5.0000e-04
Epoch 18/20




130/130 - 45s - 348ms/step - accuracy: 0.9882 - loss: 0.0475 - val_accuracy: 0.9407 - val_loss: 0.2087 - learning_rate: 5.0000e-04
Epoch 19/20




130/130 - 43s - 333ms/step - accuracy: 0.9882 - loss: 0.0462 - val_accuracy: 0.9398 - val_loss: 0.2074 - learning_rate: 5.0000e-04
Epoch 20/20
130/130 - 37s - 281ms/step - accuracy: 0.9889 - loss: 0.0484 - val_accuracy: 0.9297 - val_loss: 0.2327 - learning_rate: 5.0000e-04
Restoring model weights from the end of the best epoch: 19.
Training Xception
Epoch 1/20




130/130 - 79s - 606ms/step - accuracy: 0.0472 - loss: 5.5459 - val_accuracy: 0.0653 - val_loss: 3.6202 - learning_rate: 1.0000e-03
Epoch 2/20
130/130 - 20s - 151ms/step - accuracy: 0.0559 - loss: 3.6071 - val_accuracy: 0.0619 - val_loss: 3.6345 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 20s - 155ms/step - accuracy: 0.0720 - loss: 3.5637 - val_accuracy: 0.0915 - val_loss: 3.5018 - learning_rate: 1.0000e-03
Epoch 4/20




130/130 - 22s - 166ms/step - accuracy: 0.0730 - loss: 3.5429 - val_accuracy: 0.0992 - val_loss: 3.4706 - learning_rate: 1.0000e-03
Epoch 5/20




130/130 - 20s - 152ms/step - accuracy: 0.0901 - loss: 3.4676 - val_accuracy: 0.1110 - val_loss: 3.3991 - learning_rate: 1.0000e-03
Epoch 6/20
130/130 - 19s - 150ms/step - accuracy: 0.0942 - loss: 3.4141 - val_accuracy: 0.1076 - val_loss: 3.4149 - learning_rate: 1.0000e-03
Epoch 7/20




130/130 - 20s - 155ms/step - accuracy: 0.1005 - loss: 3.3800 - val_accuracy: 0.1381 - val_loss: 3.2642 - learning_rate: 1.0000e-03
Epoch 8/20




130/130 - 20s - 157ms/step - accuracy: 0.1130 - loss: 3.3080 - val_accuracy: 0.1347 - val_loss: 3.2363 - learning_rate: 1.0000e-03
Epoch 9/20
130/130 - 19s - 148ms/step - accuracy: 0.1176 - loss: 3.2902 - val_accuracy: 0.1508 - val_loss: 3.2421 - learning_rate: 1.0000e-03
Epoch 10/20




130/130 - 20s - 153ms/step - accuracy: 0.1335 - loss: 3.2350 - val_accuracy: 0.1703 - val_loss: 3.1548 - learning_rate: 1.0000e-03
Epoch 11/20




130/130 - 20s - 153ms/step - accuracy: 0.1313 - loss: 3.2293 - val_accuracy: 0.1881 - val_loss: 3.0995 - learning_rate: 1.0000e-03
Epoch 12/20




130/130 - 20s - 154ms/step - accuracy: 0.1412 - loss: 3.1890 - val_accuracy: 0.2034 - val_loss: 3.0873 - learning_rate: 1.0000e-03
Epoch 13/20




130/130 - 20s - 154ms/step - accuracy: 0.1525 - loss: 3.1492 - val_accuracy: 0.2127 - val_loss: 3.0140 - learning_rate: 1.0000e-03
Epoch 14/20
130/130 - 19s - 149ms/step - accuracy: 0.1561 - loss: 3.0796 - val_accuracy: 0.2051 - val_loss: 3.0490 - learning_rate: 1.0000e-03
Epoch 15/20
130/130 - 19s - 148ms/step - accuracy: 0.1559 - loss: 3.0456 - val_accuracy: 0.1966 - val_loss: 3.0177 - learning_rate: 1.0000e-03
Epoch 16/20




130/130 - 20s - 153ms/step - accuracy: 0.1655 - loss: 3.0479 - val_accuracy: 0.2288 - val_loss: 2.9324 - learning_rate: 1.0000e-03
Epoch 17/20




130/130 - 25s - 192ms/step - accuracy: 0.1788 - loss: 2.9722 - val_accuracy: 0.2161 - val_loss: 2.9059 - learning_rate: 1.0000e-03
Epoch 18/20




130/130 - 20s - 154ms/step - accuracy: 0.1816 - loss: 2.9466 - val_accuracy: 0.2195 - val_loss: 2.8760 - learning_rate: 1.0000e-03
Epoch 19/20




130/130 - 20s - 155ms/step - accuracy: 0.1727 - loss: 2.9497 - val_accuracy: 0.2186 - val_loss: 2.8471 - learning_rate: 1.0000e-03
Epoch 20/20




130/130 - 20s - 152ms/step - accuracy: 0.1903 - loss: 2.9044 - val_accuracy: 0.2551 - val_loss: 2.7452 - learning_rate: 1.0000e-03
Restoring model weights from the end of the best epoch: 20.
Training MobileNetV3
Epoch 1/20




130/130 - 50s - 381ms/step - accuracy: 0.3910 - loss: 2.3569 - val_accuracy: 0.7568 - val_loss: 1.0412 - learning_rate: 1.0000e-03
Epoch 2/20




130/130 - 41s - 319ms/step - accuracy: 0.7776 - loss: 0.8436 - val_accuracy: 0.8576 - val_loss: 0.5503 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 4s - 30ms/step - accuracy: 0.8723 - loss: 0.4900 - val_accuracy: 0.8864 - val_loss: 0.4103 - learning_rate: 1.0000e-03
Epoch 4/20




130/130 - 4s - 29ms/step - accuracy: 0.9186 - loss: 0.3178 - val_accuracy: 0.9110 - val_loss: 0.3113 - learning_rate: 1.0000e-03
Epoch 5/20




130/130 - 4s - 29ms/step - accuracy: 0.9393 - loss: 0.2351 - val_accuracy: 0.9314 - val_loss: 0.2493 - learning_rate: 1.0000e-03
Epoch 6/20




130/130 - 4s - 30ms/step - accuracy: 0.9533 - loss: 0.1843 - val_accuracy: 0.9381 - val_loss: 0.2252 - learning_rate: 1.0000e-03
Epoch 7/20




130/130 - 4s - 30ms/step - accuracy: 0.9653 - loss: 0.1405 - val_accuracy: 0.9441 - val_loss: 0.1997 - learning_rate: 1.0000e-03
Epoch 8/20




130/130 - 5s - 38ms/step - accuracy: 0.9737 - loss: 0.1163 - val_accuracy: 0.9492 - val_loss: 0.1788 - learning_rate: 1.0000e-03
Epoch 9/20
130/130 - 3s - 26ms/step - accuracy: 0.9788 - loss: 0.0935 - val_accuracy: 0.9407 - val_loss: 0.1937 - learning_rate: 1.0000e-03
Epoch 10/20




130/130 - 4s - 31ms/step - accuracy: 0.9831 - loss: 0.0769 - val_accuracy: 0.9492 - val_loss: 0.1781 - learning_rate: 1.0000e-03
Epoch 11/20




130/130 - 4s - 29ms/step - accuracy: 0.9851 - loss: 0.0716 - val_accuracy: 0.9525 - val_loss: 0.1604 - learning_rate: 1.0000e-03
Epoch 12/20
130/130 - 3s - 26ms/step - accuracy: 0.9836 - loss: 0.0638 - val_accuracy: 0.9542 - val_loss: 0.1686 - learning_rate: 1.0000e-03
Epoch 13/20




130/130 - 4s - 31ms/step - accuracy: 0.9865 - loss: 0.0551 - val_accuracy: 0.9602 - val_loss: 0.1472 - learning_rate: 1.0000e-03
Epoch 14/20




130/130 - 4s - 29ms/step - accuracy: 0.9896 - loss: 0.0451 - val_accuracy: 0.9627 - val_loss: 0.1366 - learning_rate: 1.0000e-03
Epoch 15/20
130/130 - 3s - 26ms/step - accuracy: 0.9901 - loss: 0.0445 - val_accuracy: 0.9525 - val_loss: 0.1495 - learning_rate: 1.0000e-03
Epoch 16/20
130/130 - 6s - 44ms/step - accuracy: 0.9911 - loss: 0.0395 - val_accuracy: 0.9585 - val_loss: 0.1489 - learning_rate: 1.0000e-03
Epoch 17/20

Epoch 17: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
130/130 - 3s - 26ms/step - accuracy: 0.9899 - loss: 0.0370 - val_accuracy: 0.9534 - val_loss: 0.1569 - learning_rate: 1.0000e-03
Epoch 18/20
130/130 - 3s - 26ms/step - accuracy: 0.9961 - loss: 0.0288 - val_accuracy: 0.9619 - val_loss: 0.1372 - learning_rate: 5.0000e-04
Epoch 19/20
130/130 - 3s - 25ms/step - accuracy: 0.9940 - loss: 0.0252 - val_accuracy: 0.9593 - val_loss: 0.1416 - learning_rate: 5.0000e-04
Epoch 20/20

Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628



130/130 - 39s - 304ms/step - accuracy: 0.5406 - loss: 1.7885 - val_accuracy: 0.7856 - val_loss: 0.7767 - learning_rate: 1.0000e-03
Epoch 2/20




130/130 - 20s - 152ms/step - accuracy: 0.8184 - loss: 0.6233 - val_accuracy: 0.8669 - val_loss: 0.4743 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 20s - 152ms/step - accuracy: 0.8933 - loss: 0.3645 - val_accuracy: 0.9008 - val_loss: 0.3563 - learning_rate: 1.0000e-03
Epoch 4/20
130/130 - 13s - 103ms/step - accuracy: 0.9338 - loss: 0.2329 - val_accuracy: 0.8822 - val_loss: 0.3677 - learning_rate: 1.0000e-03
Epoch 5/20




130/130 - 17s - 134ms/step - accuracy: 0.9521 - loss: 0.1739 - val_accuracy: 0.9271 - val_loss: 0.2686 - learning_rate: 1.0000e-03
Epoch 6/20
130/130 - 13s - 103ms/step - accuracy: 0.9656 - loss: 0.1310 - val_accuracy: 0.9237 - val_loss: 0.2812 - learning_rate: 1.0000e-03
Epoch 7/20




130/130 - 17s - 130ms/step - accuracy: 0.9692 - loss: 0.1117 - val_accuracy: 0.9347 - val_loss: 0.2257 - learning_rate: 1.0000e-03
Epoch 8/20




130/130 - 19s - 144ms/step - accuracy: 0.9795 - loss: 0.0877 - val_accuracy: 0.9432 - val_loss: 0.2112 - learning_rate: 1.0000e-03
Epoch 9/20




130/130 - 15s - 116ms/step - accuracy: 0.9887 - loss: 0.0540 - val_accuracy: 0.9432 - val_loss: 0.2047 - learning_rate: 1.0000e-03
Epoch 10/20




130/130 - 20s - 151ms/step - accuracy: 0.9880 - loss: 0.0483 - val_accuracy: 0.9373 - val_loss: 0.1993 - learning_rate: 1.0000e-03
Epoch 11/20
130/130 - 13s - 103ms/step - accuracy: 0.9846 - loss: 0.0542 - val_accuracy: 0.9373 - val_loss: 0.2379 - learning_rate: 1.0000e-03
Epoch 12/20
130/130 - 14s - 104ms/step - accuracy: 0.9819 - loss: 0.0644 - val_accuracy: 0.9314 - val_loss: 0.2670 - learning_rate: 1.0000e-03
Epoch 13/20

Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
130/130 - 13s - 103ms/step - accuracy: 0.9875 - loss: 0.0477 - val_accuracy: 0.9424 - val_loss: 0.2129 - learning_rate: 1.0000e-03
Epoch 14/20
130/130 - 13s - 102ms/step - accuracy: 0.9935 - loss: 0.0263 - val_accuracy: 0.9441 - val_loss: 0.2028 - learning_rate: 5.0000e-04
Epoch 15/20




130/130 - 18s - 140ms/step - accuracy: 0.9971 - loss: 0.0166 - val_accuracy: 0.9466 - val_loss: 0.1942 - learning_rate: 5.0000e-04
Epoch 16/20




130/130 - 20s - 158ms/step - accuracy: 0.9964 - loss: 0.0161 - val_accuracy: 0.9500 - val_loss: 0.1880 - learning_rate: 5.0000e-04
Epoch 17/20
130/130 - 34s - 261ms/step - accuracy: 0.9969 - loss: 0.0136 - val_accuracy: 0.9483 - val_loss: 0.1940 - learning_rate: 5.0000e-04
Epoch 18/20
130/130 - 14s - 107ms/step - accuracy: 0.9974 - loss: 0.0150 - val_accuracy: 0.9466 - val_loss: 0.1907 - learning_rate: 5.0000e-04
Epoch 19/20

Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
130/130 - 14s - 105ms/step - accuracy: 0.9981 - loss: 0.0131 - val_accuracy: 0.9508 - val_loss: 0.1940 - learning_rate: 5.0000e-04
Epoch 20/20




130/130 - 20s - 158ms/step - accuracy: 0.9981 - loss: 0.0122 - val_accuracy: 0.9517 - val_loss: 0.1797 - learning_rate: 2.5000e-04
Restoring model weights from the end of the best epoch: 20.
Trying BiT features/classifier (if TF-Hub BiT loaded)...
Epoch 1/20




130/130 - 51s - 390ms/step - accuracy: 0.5228 - loss: 1.7945 - val_accuracy: 0.8161 - val_loss: 0.7161 - learning_rate: 1.0000e-03
Epoch 2/20




130/130 - 20s - 157ms/step - accuracy: 0.8357 - loss: 0.5853 - val_accuracy: 0.8847 - val_loss: 0.4221 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 24s - 183ms/step - accuracy: 0.8945 - loss: 0.3706 - val_accuracy: 0.8924 - val_loss: 0.3780 - learning_rate: 1.0000e-03
Epoch 4/20




130/130 - 21s - 164ms/step - accuracy: 0.9362 - loss: 0.2341 - val_accuracy: 0.9127 - val_loss: 0.2882 - learning_rate: 1.0000e-03
Epoch 5/20
130/130 - 13s - 103ms/step - accuracy: 0.9511 - loss: 0.1722 - val_accuracy: 0.9178 - val_loss: 0.3057 - learning_rate: 1.0000e-03
Epoch 6/20




130/130 - 20s - 152ms/step - accuracy: 0.9658 - loss: 0.1255 - val_accuracy: 0.9246 - val_loss: 0.2592 - learning_rate: 1.0000e-03
Epoch 7/20
130/130 - 13s - 103ms/step - accuracy: 0.9709 - loss: 0.1021 - val_accuracy: 0.9280 - val_loss: 0.2657 - learning_rate: 1.0000e-03
Epoch 8/20




130/130 - 19s - 144ms/step - accuracy: 0.9735 - loss: 0.0902 - val_accuracy: 0.9373 - val_loss: 0.2207 - learning_rate: 1.0000e-03
Epoch 9/20
130/130 - 13s - 101ms/step - accuracy: 0.9819 - loss: 0.0712 - val_accuracy: 0.9449 - val_loss: 0.2246 - learning_rate: 1.0000e-03
Epoch 10/20




130/130 - 19s - 149ms/step - accuracy: 0.9827 - loss: 0.0643 - val_accuracy: 0.9441 - val_loss: 0.2139 - learning_rate: 1.0000e-03
Epoch 11/20




130/130 - 19s - 147ms/step - accuracy: 0.9865 - loss: 0.0532 - val_accuracy: 0.9407 - val_loss: 0.2132 - learning_rate: 1.0000e-03
Epoch 12/20
130/130 - 14s - 104ms/step - accuracy: 0.9836 - loss: 0.0582 - val_accuracy: 0.9381 - val_loss: 0.2355 - learning_rate: 1.0000e-03
Epoch 13/20
130/130 - 13s - 104ms/step - accuracy: 0.9870 - loss: 0.0487 - val_accuracy: 0.9347 - val_loss: 0.2188 - learning_rate: 1.0000e-03
Epoch 14/20

Epoch 14: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
130/130 - 13s - 103ms/step - accuracy: 0.9843 - loss: 0.0509 - val_accuracy: 0.9339 - val_loss: 0.2193 - learning_rate: 1.0000e-03
Epoch 15/20




130/130 - 23s - 180ms/step - accuracy: 0.9954 - loss: 0.0217 - val_accuracy: 0.9492 - val_loss: 0.1766 - learning_rate: 5.0000e-04
Epoch 16/20




130/130 - 25s - 195ms/step - accuracy: 0.9964 - loss: 0.0178 - val_accuracy: 0.9559 - val_loss: 0.1602 - learning_rate: 5.0000e-04
Epoch 17/20
130/130 - 14s - 105ms/step - accuracy: 0.9966 - loss: 0.0139 - val_accuracy: 0.9500 - val_loss: 0.1700 - learning_rate: 5.0000e-04
Epoch 18/20
130/130 - 14s - 107ms/step - accuracy: 0.9976 - loss: 0.0148 - val_accuracy: 0.9585 - val_loss: 0.1634 - learning_rate: 5.0000e-04
Epoch 19/20

Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
130/130 - 20s - 152ms/step - accuracy: 0.9986 - loss: 0.0116 - val_accuracy: 0.9542 - val_loss: 0.1702 - learning_rate: 5.0000e-04
Epoch 20/20
130/130 - 13s - 101ms/step - accuracy: 0.9990 - loss: 0.0093 - val_accuracy: 0.9517 - val_loss: 0.1643 - learning_rate: 2.5000e-04
Restoring model weights from the end of the best epoch: 16.
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 269ms/step
Building Ensemble (MobileNetV2, ResNet50, InceptionV3)...
Downloading data from http



130/130 - 247s - 2s/step - accuracy: 0.4086 - loss: 2.3034 - val_accuracy: 0.0669 - val_loss: 14.5801 - learning_rate: 1.0000e-03
Epoch 2/20




130/130 - 114s - 873ms/step - accuracy: 0.6993 - loss: 1.1377 - val_accuracy: 0.1797 - val_loss: 7.3900 - learning_rate: 1.0000e-03
Epoch 3/20




130/130 - 121s - 927ms/step - accuracy: 0.7938 - loss: 0.7743 - val_accuracy: 0.4034 - val_loss: 3.6555 - learning_rate: 1.0000e-03
Epoch 4/20




130/130 - 70s - 537ms/step - accuracy: 0.8781 - loss: 0.4461 - val_accuracy: 0.6831 - val_loss: 1.6871 - learning_rate: 1.0000e-03
Epoch 5/20




130/130 - 125s - 960ms/step - accuracy: 0.8887 - loss: 0.4068 - val_accuracy: 0.7475 - val_loss: 1.0939 - learning_rate: 1.0000e-03
Epoch 6/20
130/130 - 33s - 257ms/step - accuracy: 0.9181 - loss: 0.2888 - val_accuracy: 0.4814 - val_loss: 3.0437 - learning_rate: 1.0000e-03
Epoch 7/20
130/130 - 33s - 252ms/step - accuracy: 0.9121 - loss: 0.3315 - val_accuracy: 0.2839 - val_loss: 6.5917 - learning_rate: 1.0000e-03
Epoch 8/20

Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
130/130 - 33s - 250ms/step - accuracy: 0.9287 - loss: 0.2617 - val_accuracy: 0.3153 - val_loss: 5.2903 - learning_rate: 1.0000e-03
Epoch 9/20




130/130 - 124s - 951ms/step - accuracy: 0.9771 - loss: 0.0833 - val_accuracy: 0.9364 - val_loss: 0.2284 - learning_rate: 5.0000e-04
Epoch 10/20




130/130 - 95s - 731ms/step - accuracy: 0.9959 - loss: 0.0187 - val_accuracy: 0.9619 - val_loss: 0.1230 - learning_rate: 5.0000e-04
Epoch 11/20
130/130 - 34s - 259ms/step - accuracy: 0.9964 - loss: 0.0136 - val_accuracy: 0.9661 - val_loss: 0.1278 - learning_rate: 5.0000e-04
Epoch 12/20
130/130 - 33s - 251ms/step - accuracy: 0.9986 - loss: 0.0071 - val_accuracy: 0.9644 - val_loss: 0.1248 - learning_rate: 5.0000e-04
Epoch 13/20




130/130 - 84s - 643ms/step - accuracy: 0.9993 - loss: 0.0054 - val_accuracy: 0.9686 - val_loss: 0.1102 - learning_rate: 5.0000e-04
Epoch 14/20
130/130 - 34s - 260ms/step - accuracy: 0.9978 - loss: 0.0071 - val_accuracy: 0.9636 - val_loss: 0.1398 - learning_rate: 5.0000e-04
Epoch 15/20
130/130 - 33s - 252ms/step - accuracy: 0.9995 - loss: 0.0048 - val_accuracy: 0.9678 - val_loss: 0.1295 - learning_rate: 5.0000e-04
Epoch 16/20

Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
130/130 - 33s - 251ms/step - accuracy: 0.9995 - loss: 0.0032 - val_accuracy: 0.9551 - val_loss: 0.1862 - learning_rate: 5.0000e-04
Epoch 17/20
130/130 - 33s - 251ms/step - accuracy: 0.9990 - loss: 0.0042 - val_accuracy: 0.9653 - val_loss: 0.1365 - learning_rate: 2.5000e-04
Epoch 18/20
130/130 - 32s - 249ms/step - accuracy: 0.9995 - loss: 0.0024 - val_accuracy: 0.9653 - val_loss: 0.1328 - learning_rate: 2.5000e-04
Epoch 19/20

Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0001250

In [None]:
# 2) Model sizes & inference times (single sample)
def model_size_and_time(model):
    # save weights to temp and measure size
    tmp_path = os.path.join(data_dir, "tmp_model.weights.h5")
    model.save_weights(tmp_path)
    size_mb = os.path.getsize(tmp_path)/1024/1024
    os.remove(tmp_path)
    # time inference of 100 runs
    sample = np.random.randn(1, IMG_SIZE[0], IMG_SIZE[1], 3).astype('float32')
    start = time.time()
    for _ in range(50):
        _ = model.predict(sample)
    t = (time.time()-start)/50
    return round(size_mb,3), round(t*1000,3)  # MB, ms
rows=[]
for name, model in trained_classifiers.items():
    s, tms = model_size_and_time(model)
    rows.append([name, s, tms])
pd.DataFrame(rows, columns=['Model','Size(MB)','AvgInference(ms)']).to_csv(os.path.join(data_dir,"model_size_inference.csv"), index=False)

# 3) Ablation idea: train small MobileNetV2 without augmentation vs with augmentation (code left as description)
# (You can run two experiments and include results in paper: augmentation improves per-class recall for visually variable species.)

print("All done. Artifacts saved to:", data_dir)
print("Key files: models_summary_table.csv, per-model classification reports, confusion matrices (*.png), gradcam images, tsne_embeddings.png")

In [None]:
# =========== Evaluation summary (table) ===========
rows=[]
for name, rep in reports.items():
    acc = rep['accuracy']*100
    macro_prec = np.mean([rep[str(i)]['precision'] for i in range(num_classes)])*100
    macro_rec  = np.mean([rep[str(i)]['recall'] for i in range(num_classes)])*100
    macro_f1   = np.mean([rep[str(i)]['f1-score'] for i in range(num_classes)])*100

    # 🟢 NEW — Get validation accuracy from training history (if available)
    if name in histories and 'val_accuracy' in histories[name].history:
        val_acc = histories[name].history['val_accuracy'][-1] * 100
    else:
        val_acc = np.nan

    rows.append([name, round(acc,2), round(val_acc, 2), round(macro_prec,2), round(macro_rec,2), round(macro_f1,2)])
# df_summary = pd.DataFrame(rows, columns=['Model',' Accuracy(%)','Validation Accuracy(%)','MacroPrecision(%)','MacroRecall(%)','MacroF1(%)']).sort_values('Accuracy(%)',ascending=False)

df_summary = pd.DataFrame(
    rows,
    columns=['Model','Accuracy(%)','Validation Accuracy(%)','MacroPrecision(%)','MacroRecall(%)','MacroF1(%)']
).sort_values('Accuracy(%)', ascending=False)
df_summary.to_csv(os.path.join(data_dir, "models_summary_table.csv"), index=False)
print(df_summary)

In [None]:
# ===== Add Ensemble model to trained_classifiers =====
import numpy as np
from tensorflow.keras.models import Model

class EnsembleWrapper:
    """
    Wrapper that behaves like a Keras model and
    provides a .predict(dataset, verbose=0) method.
    """
    def __init__(self, member_models):
        self.member_models = member_models

    def predict(self, dataset, verbose=0):
        # collect probability predictions from each member
        preds = [m.predict(dataset, verbose=verbose) for m in self.member_models]
        # average the probabilities across models
        return np.mean(preds, axis=0)

# ---- choose the models to ensemble ----
# you can change this list to whichever models you prefer
ensemble_members = [
    trained_classifiers['MobileNetV3'],
    trained_classifiers['BiT_s-r50x1'],
    trained_classifiers['ResNet50'],
    trained_classifiers['ResNet152']
]

# create and register the ensemble
trained_classifiers['Ensemble'] = EnsembleWrapper(ensemble_members)

print("Ensemble model added to trained_classifiers:", list(trained_classifiers.keys()))


In [None]:
# =========== McNemar test between top 2 models ===========
if df_summary.shape[0] >= 2:
    top1 = df_summary.iloc[0]['Model']; top2 = df_summary.iloc[1]['Model']
    print("Top models:", top1, top2)
    p1 = np.argmax(trained_classifiers[top1].predict(test_ds, verbose=0), axis=1)
    p2 = np.argmax(trained_classifiers[top2].predict(test_ds, verbose=0), axis=1)
    correct1 = (p1 == y_true); correct2 = (p2 == y_true)
    table = [[np.sum((correct1==True)&(correct2==True)), np.sum((correct1==True)&(correct2==False))],
             [np.sum((correct1==False)&(correct2==True)), np.sum((correct1==False)&(correct2==False))]]
    result = mcnemar(table, exact=False)
    with open(os.path.join(data_dir, "mcnemar.txt"), "w") as f:
        f.write(f"Contingency table: {table}\nMcNemar p-value: {result.pvalue}\n")
    print("McNemar p-value:", result.pvalue)


In [None]:
# import os
# from pathlib import Path
# import numpy as np
# import tensorflow as tf
# import matplotlib.pyplot as plt
# import cv2
# # ==== 1. Base directory and gradcam folder ====
# data_dir = "/content/Medicinal plant dataset"      # your dataset root
# gradcam_dir = Path(data_dir) / "gradcam"
# gradcam_dir.mkdir(parents=True, exist_ok=True)     # create folder if missing
# print("Grad-CAM output directory:", gradcam_dir)

In [None]:
# =========== Explainability: Grad-CAM for selected models ===========
def make_gradcam(model, img_tensor, class_index=None, layer_name=None):
    # model: Keras classification model
    if layer_name is None:
        # try to find last conv layer
        for layer in reversed(model.layers):
            if isinstance(layer, tf.keras.layers.Conv2D) or 'conv' in layer.name:
                layer_name = layer.name; break
    grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_tensor)
        if class_index is None:
            class_index = tf.argmax(predictions[0])
        loss = predictions[:, class_index]
    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0,1,2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / (tf.math.reduce_max(heatmap) + 1e-8)
    return heatmap.numpy(), layer_name

In [None]:
# pick 4 test samples and save Grad-CAM overlays
import cv2
os.makedirs(os.path.join(data_dir, "gradcam"), exist_ok=True)
# get raw test images and labels
x_test_imgs = []; y_test_lbls = []
for x,y in test_ds:
    for i in range(x.shape[0]):
        x_test_imgs.append(x[i].numpy()); y_test_lbls.append(int(y[i].numpy()))
    if len(x_test_imgs) >= 50: break
x_test_imgs = np.array(x_test_imgs); y_test_lbls = np.array(y_test_lbls)
sample_idx = np.random.choice(len(x_test_imgs), size=min(6,len(x_test_imgs)), replace=False)
for model_name in ['MobileNetV2','ResNet152','Xception']:
    if model_name not in trained_classifiers: continue
    mdl = trained_classifiers[model_name]
    for i,si in enumerate(sample_idx):
        img = x_test_imgs[si]
        inp = np.expand_dims(img, axis=0)
        # heatmap, lname = make_gradcam(mdl, inp)
        inp_tf = tf.convert_to_tensor(inp, dtype=tf.float32)
        heatmap, lname = make_gradcam(mdl, inp_tf)
        heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
        heatmap = np.uint8(255*heatmap)
        heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
        overlay = cv2.addWeighted(np.uint8(img*255), 0.6, heatmap, 0.4, 0)
        print('gradcam result')
        from pathlib import Path

        gradcam_dir = Path(data_dir) / "gradcam"
        gradcam_dir.mkdir(parents=True, exist_ok=True)

        out_path = gradcam_dir / f"{model_name}_sample{i}.png"
        plt.imsave(out_path.as_posix(), overlay)
        plt.imsave(os.path.join(data_dir, f"gradcam/{model_name}_sample{i}.png"), overlay)


In [None]:
# ===============================================================
# t-SNE of MobileNetV3 (large) embeddings grouped into 4 classes
# ===============================================================
from tensorflow.keras import Model
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

# --- 1) Get the trained MobileNetV3 model from your dictionary ---
mobilenetv3_trained = models_to_train['MobileNetV3']   # if you trained it directly
# or, if you later moved it to another dict, change to:
# mobilenetv3_trained = trained_classifiers['MobileNetV3']

# --- 2) Create a feature extractor up to the global average pooling layer ---
feature_model_mnv3 = Model(
    inputs=mobilenetv3_trained.input,
    outputs=mobilenetv3_trained.layers[-4].output  # layer before Dense(128)
)

# --- 3) Extract features from the test set ---
def extract_features(ds, model):
    feats, labels = [], []
    for xb, yb in ds:
        feats.append(model.predict(xb, verbose=0))
        labels.append(yb.numpy())
    return np.vstack(feats), np.concatenate(labels)

X_feats, y_feats = extract_features(test_ds, feature_model_mnv3)
print("Feature shape:", X_feats.shape)

# --- 4) Split all class IDs into 4 nearly equal groups ---
all_classes = np.unique(y_feats)
groups = np.array_split(all_classes, 4)

# --- 5) Run t-SNE for each group and save plots ---
for gi, gclasses in enumerate(groups, start=1):
    mask = np.isin(y_feats, gclasses)
    X_sub = X_feats[mask]
    y_sub = y_feats[mask]

    X_scaled = StandardScaler().fit_transform(X_sub)
    Z = TSNE(n_components=2, perplexity=30, random_state=42).fit_transform(X_scaled)

    plt.figure(figsize=(8,6))
    sns.scatterplot(
        x=Z[:,0], y=Z[:,1],
        hue=[class_names[i] for i in y_sub.astype(int)],
        palette="tab20", s=25, linewidth=0
    )
    plt.title(f"t-SNE of MobileNetV3 Embeddings – Group {gi}")
    plt.legend(bbox_to_anchor=(1.05,1), loc='upper left', fontsize='small')
    plt.tight_layout()
    out_path = os.path.join(data_dir, f"tsne_mobilenetv3_group_{gi}.png")
    plt.savefig(out_path, dpi=300)
    plt.show()
    print(f"Saved: {out_path}")


In [None]:
# # =========== Extra analyses for paper ===========
# # 1) t-SNE of embeddings from ResNet50
# SEED = 42
# embs, lbls = extract_features(test_ds, feature_model)
# tsne = TSNE(n_components=2, random_state=SEED, perplexity=30)
# Z = tsne.fit_transform(embs[:1000]) if len(embs)>1000 else tsne.fit_transform(embs)
# plt.figure(figsize=(8,6))
# sns.scatterplot(x=Z[:,0], y=Z[:,1], hue=[class_names[l] for l in (lbls[:len(Z)].astype(int))], palette='tab10', s=20)
# plt.title('t-SNE of ResNet50 pooled embeddings'); plt.savefig(os.path.join(data_dir,"tsne_embeddings.png")); plt.close()

# ## ============================================
# # t-SNE visualization of ResNet50 embeddings
# # ============================================
# from sklearn.manifold import TSNE
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np
# import os

# # We already have:
# #   X_test_feats : (N, feature_dim)  – from extract_features()
# #   y_test_feats : (N,) integer labels
# #   class_names  : list of class names
# #   data_dir     : output directory

# # Split class IDs into 4 nearly equal groups
# all_classes = np.unique(y_test_feats)
# groups = np.array_split(all_classes, 4)

# for gi, gclasses in enumerate(groups, start=1):
#     # Mask the test samples belonging to this group
#     mask = np.isin(y_test_feats, gclasses)
#     X_sub = X_test_feats[mask]
#     y_sub = y_test_feats[mask]

#     # Optional: scale before t-SNE for stability
#     from sklearn.preprocessing import StandardScaler
#     X_sub_scaled = StandardScaler().fit_transform(X_sub)

#     # Run t-SNE on this subset
#     tsne = TSNE(n_components=2, perplexity=30, random_state=42)
#     X_tsne = tsne.fit_transform(X_sub_scaled)

#     # Plot
#     plt.figure(figsize=(8,6))
#     sns.scatterplot(
#         x=X_tsne[:,0],
#         y=X_tsne[:,1],
#         hue=[class_names[i] for i in y_sub],
#         palette="tab20",
#         s=20,
#         linewidth=0
#     )
#     plt.title(f"t-SNE of ResNet50 Embeddings – Class Group {gi}")
#     plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
#     plt.tight_layout()

#     # Save and show
#     out_path = os.path.join(data_dir, f"tsne_group_{gi}.png")
#     plt.savefig(out_path, dpi=300)
#     plt.show()

#     print(f"Saved: {out_path}")


# 2) Model sizes & inference times (single sample)
def model_size_and_time(model):
    # save weights to temp and measure size
    tmp_path = os.path.join(data_dir, "tmp_model.weights.h5")
    model.save_weights(tmp_path)
    size_mb = os.path.getsize(tmp_path)/1024/1024
    os.remove(tmp_path)
    # time inference of 100 runs
    sample = np.random.randn(1, IMG_SIZE[0], IMG_SIZE[1], 3).astype('float32')
    start = time.time()
    for _ in range(50):
        _ = model.predict(sample)
    t = (time.time()-start)/50
    return round(size_mb,3), round(t*1000,3)  # MB, ms
rows=[]
for name, model in trained_classifiers.items():
    s, tms = model_size_and_time(model)
    rows.append([name, s, tms])
pd.DataFrame(rows, columns=['Model','Size(MB)','AvgInference(ms)']).to_csv(os.path.join(data_dir,"model_size_inference.csv"), index=False)

# 3) Ablation idea: train small MobileNetV2 without augmentation vs with augmentation (code left as description)
# (You can run two experiments and include results in paper: augmentation improves per-class recall for visually variable species.)

print("All done. Artifacts saved to:", data_dir)
print("Key files: models_summary_table.csv, per-model classification reports, confusion matrices (*.png), gradcam images, tsne_embeddings.png")