In [1]:
import os
import time
import random
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetV2S
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger, ReduceLROnPlateau
import albumentations as A

In [2]:
# Set random seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [3]:
# # Enable mixed precision for faster training
# tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [4]:
# Data paths
BASE_DIR = "D:\\Ashutosh\\Herbs\\Cleanede_Data"
TRAIN_DIR = os.path.join(BASE_DIR, "Train")
VAL_DIR = os.path.join(BASE_DIR, "Val")
OUTPUT_DIR = "D:\\Ashutosh\\Herbs\\Trained_Output_EfficientNetV2_new"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [5]:
# Image parameters
IMAGE_SIZE = (224, 224)  # EfficientNetV2S default size
BATCH_SIZE = 32
NUM_CLASSES = 109

In [6]:
# Advanced augmentation with albumentations
train_transform = A.Compose([
    A.Resize(*IMAGE_SIZE),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=20, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet stats
    A.ToFloat()
])

val_transform = A.Compose([
    A.Resize(*IMAGE_SIZE),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.ToFloat()
])

  original_init(self, **validated_kwargs)


In [7]:
# Data loading and preprocessing with tf.data
def load_and_preprocess_image(file_path, label, is_training=True):
    # Read and decode image
    img = tf.io.read_file(file_path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)  # Use decode_image for robustness
    img = tf.image.convert_image_dtype(img, tf.float32)  # [0, 1] range

    # Define augmentation function
    def apply_augmentation(image):
        if is_training:
            return train_transform(image=image.numpy())['image']
        return val_transform(image=image.numpy())['image']

    # Apply augmentation using tf.py_function
    img = tf.py_function(apply_augmentation, [img], tf.float32)
    img.set_shape([*IMAGE_SIZE, 3])  # Explicitly set shape
    return img, label

def create_dataset(directory, is_training=True):
    file_paths = []
    labels = []
    class_names = sorted(os.listdir(directory))
    if len(class_names) != NUM_CLASSES:
        raise ValueError(f"Expected {NUM_CLASSES} classes, found {len(class_names)} in {directory}")
    
    class_to_idx = {name: idx for idx, name in enumerate(class_names)}
    
    for class_name in class_names:
        class_dir = os.path.join(directory, class_name)
        if not os.path.isdir(class_dir):
            continue
        for fname in os.listdir(class_dir):
            if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                file_paths.append(os.path.join(class_dir, fname))
                labels.append(class_to_idx[class_name])
    
    if not file_paths:
        raise ValueError(f"No valid images found in {directory}")
    
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, tf.keras.utils.to_categorical(labels, NUM_CLASSES)))
    dataset = dataset.map(
        lambda x, y: load_and_preprocess_image(x, y, is_training),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    if is_training:
        dataset = dataset.shuffle(buffer_size=1000, seed=SEED)
    dataset = dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return dataset, len(file_paths)

# Create datasets
train_dataset, train_size = create_dataset(TRAIN_DIR, is_training=True)
val_dataset, val_size = create_dataset(VAL_DIR, is_training=False)

print(f"Training samples: {train_size}, Validation samples: {val_size}")

Training samples: 60236, Validation samples: 15059


In [None]:
# Model definition
base_model = EfficientNetV2S(
    include_top=False,
    input_shape=(*IMAGE_SIZE, 3),
    weights='imagenet'
)
base_model.trainable = True  # Fine-tune the entire model

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),  # Mixed precision requires float32 for dense layers
    layers.Dropout(0.3),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

model.summary()

In [None]:
# Compile model with AdamW
model.compile(
    optimizer=AdamW(learning_rate=0.0005),
    loss=CategoricalCrossentropy(),  # Label smoothing to reduce overfitting
    metrics=['accuracy']
)

In [10]:
# Callbacks
filepath = os.path.join(OUTPUT_DIR, "HerbClassification_BEST.h5")
callbacks = [
    ModelCheckpoint(filepath, monitor='val_accuracy', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6, verbose=1),
    EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
    CSVLogger(os.path.join(OUTPUT_DIR, f"training_log_{int(time.time())}.csv"))
]

In [None]:
# Training
steps_per_epoch = (train_size + BATCH_SIZE - 1) // BATCH_SIZE  # Ceiling division
validation_steps = (val_size + BATCH_SIZE - 1) // BATCH_SIZE    # Ceiling division
epochs = 200  # Fewer epochs with early stopping

history = model.fit(
    train_dataset,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_dataset,
    validation_steps=validation_steps,
    callbacks=callbacks
)

Epoch 1/200
[1m1883/1883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 768ms/step - accuracy: 0.1697 - loss: 3.4922
Epoch 1: val_accuracy improved from -inf to 0.00910, saving model to D:\Ashutosh\Herbs\Trained_Output_EfficientNetV2_new\HerbClassification_BEST.h5




[1m1883/1883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1799s[0m 948ms/step - accuracy: 0.1697 - loss: 3.4927 - val_accuracy: 0.0091 - val_loss: 4.7330 - learning_rate: 0.0030
Epoch 2/200


In [None]:
# Save final model
model.save(os.path.join(OUTPUT_DIR, "HerbClassification_Final.h5"))

In [None]:
# Plot training history
def plot_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    ax1.plot(history.history['accuracy'], label='Train Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Val Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax2.plot(history.history['loss'], label='Train Loss')
    ax2.plot(history.history['val_loss'], label='Val Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    plt.tight_layout()
    plt.show()

plot_history(history)