In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import cv2  # OpenCV for preprocessing
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
DATASET_PATH = "D:/Sem7/Capstone 2/teaLeafBD/teaLeafBD"
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
INITIAL_EPOCHS = 25
FINE_TUNE_EPOCHS = 10

In [3]:
def apply_clahe(image):
    """
    Applies CLAHE to the L channel of an image in LAB color space.
    This enhances local contrast, which is great for highlighting disease spots.
    """
    # The image is already rescaled to [0, 1] by the generator, so scale it back to [0, 255] for cv2
    image_uint8 = (image * 255).astype(np.uint8)
    # Convert image from RGB to LAB color space
    lab_image = cv2.cvtColor(image_uint8, cv2.COLOR_RGB2LAB)
    l_channel, a_channel, b_channel = cv2.split(lab_image)

    # Apply CLAHE to the L-channel
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    cl_channel = clahe.apply(l_channel)

    # Merge the CLAHE enhanced L-channel back with A and B channels
    merged_channels = cv2.merge([cl_channel, a_channel, b_channel])

    # Convert back to RGB color space and rescale to [0, 1]
    final_image = cv2.cvtColor(merged_channels, cv2.COLOR_LAB2RGB)
    return final_image / 255.0

In [4]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    # NOTE: The preprocessing function is now handled differently in modern Keras.
    # It's better to use tf.data.Dataset.map for this.
    # However, for simplicity with ImageDataGenerator, we'll keep it but be mindful of data types.
    # A safer approach is to apply rescaling first, then the function.
    # Let's adjust the function to handle float inputs.
    validation_split=0.2  # Use 20% of training data for validation
)


In [7]:
train_datagen.preprocessing_function = apply_clahe

test_datagen = ImageDataGenerator(
    rescale=1./255,
    preprocessing_function=apply_clahe # Apply CLAHE to test data as well
)

# Flow data from directories

In [6]:
# Make sure DATASET_PATH is set to your preprocessed directory
# DATASET_PATH = "D:/Sem7/Capstone 2/preprocessed_teaLeafBD"

# The ImageDataGenerator should have validation_split set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # Example split, adjust as needed
    preprocessing_function=apply_clahe
)

# Flow data directly from the root of the dataset directory
train_generator = train_datagen.flow_from_directory(
    DATASET_PATH,  # Corrected: Point to the root directory
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Create the corresponding validation generator from the same directory
validation_generator = train_datagen.flow_from_directory(
    DATASET_PATH,  # Corrected: Point to the root directory
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# You would use your test_datagen on a separate, unseen test dataset folder
# For example:
# test_generator = test_datagen.flow_from_directory(
#     'path/to/your/test_data',
#     ...
# )

Found 4223 images belonging to 7 classes.
Found 1053 images belonging to 7 classes.


In [8]:
def build_model(num_classes):
    """
    Builds a classification model using EfficientNetB0 for transfer learning.
    """
    # Input layer
    inputs = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    # Load the base model
    base_model = EfficientNetB0(
        include_top=False,
        weights='imagenet',
        input_tensor=inputs
    )

    # Freeze the layers of the base model
    base_model.trainable = False

    # Add custom classification head
    x = base_model.output
    x = GlobalAveragePooling2D(name="avg_pool")(x)

    x = Dropout(0.5)(x) # Regularization
    outputs = Dense(num_classes, activation="softmax", name="pred")(x)

    # Create the final model
    model = Model(inputs, outputs)

    return model

model = build_model(NUM_CLASSES)

# Compile the model for initial training
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss=CategoricalCrossentropy(),
    metrics=["accuracy"]
)

model.summary()

NameError: name 'NUM_CLASSES' is not defined

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import os
import time

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

LEARNING_RATE = 0.001
BATCH_SIZE = 16
NUM_EPOCHS = 10
PATIENCE = 15
RF_FACTOR = 0.5
NUM_CLASSES = 7

Using device: cpu


In [8]:
DATA_DIR = r"D:/Sem7/Capstone 2/teaLeafBD/teaLeafBD"
TRAIN_DIR = os.path.join(DATA_DIR, "train")
TEST_DIR = os.path.join(DATA_DIR, "test")


preprocess_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, random_split
import random


DATA_DIR = r"D:\Sem7\Capstone 2\teaLeafBD"

if not os.path.exists(DATA_DIR):
    print(f"ERROR: The path was not found at: {DATA_DIR}")
    print("Please make sure this path is correct.")
else:
    print(f"Loading data from: {DATA_DIR}")


train_augmentation_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=20),
    transforms.RandomResizedCrop(256, scale=(0.8, 1.0)),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ColorJitter(brightness=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

preprocess_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])



train_dataset_with_aug = datasets.ImageFolder(DATA_DIR, transform=train_augmentation_transform)
dataset_no_aug = datasets.ImageFolder(DATA_DIR, transform=preprocess_transform)

total_size = len(dataset_no_aug)

test_size = int(total_size * 0.20)
train_val_size = total_size - test_size


indices = list(range(total_size))
random.seed(42)
random.shuffle(indices)

train_val_indices = indices[:train_val_size]
test_indices = indices[train_val_size:]

val_size = int(train_val_size * 0.10)
train_size = train_val_size - val_size

train_indices = train_val_indices[:train_size]
val_indices = train_val_indices[train_size:]


train_subset = Subset(train_dataset_with_aug, train_indices)
val_subset = Subset(dataset_no_aug, val_indices)
test_subset = Subset(dataset_no_aug, test_indices)

print(f"Total images: {total_size}")
print(f"Training images: {len(train_subset)}")
print(f"Validation images: {len(val_subset)}")
print(f"Testing images: {len(test_subset)}")



BATCH_SIZE = 16

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

print("\nDataLoaders created successfully.")

Loading data from: D:\Sem7\Capstone 2\teaLeafBD
Total images: 5276
Training images: 3799
Validation images: 422
Testing images: 1055

DataLoaders created successfully.


In [16]:
import torch
import torch.nn as nn
from torchvision import models

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Building model for device: {DEVICE}")

class ECA(nn.Module):

    def __init__(self, channels, kernel_size=3):
        super(ECA, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)

        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=padding, bias=False)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):

        y = self.avg_pool(x)

        y = y.squeeze(-1).squeeze(-1).unsqueeze(1)

        y = self.conv(y)

        y = y.squeeze(1).unsqueeze(-1).unsqueeze(-1)
        y = self.sigmoid(y)

        return x * y

def create_m_efficientnet_b0_eca(num_classes=7):

    model = models.efficientnet_b0(weights=None)

    def replace_se_with_eca(module):
        for name, child in module.named_children():
            if isinstance(child, models.efficientnet.SqueezeExcitation):
                channels = child.fc1.in_channels
                setattr(module, name, ECA(channels=channels))
            else:
                replace_se_with_eca(child)

    replace_se_with_eca(model.features)

    model.features[6] = model.features[6][0]
    model.features[7] = nn.Identity()


    orig_stage9 = model.features[8]
    out_channels = orig_stage9[0].out_channels
    norm_layer = type(orig_stage9[1])
    activation_layer = type(orig_stage9[2])

    new_in_channels = 192

    model.features[8] = models.efficientnet.Conv2dNormActivation(
        new_in_channels,
        out_channels,
        kernel_size=1,
        stride=1,
        norm_layer=norm_layer,
        activation_layer=activation_layer,
        inplace=True
    )

    NUM_CLASSES = 7
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, NUM_CLASSES)

    print("Model m-EfficientNetB0+ECA created successfully.")
    return model

model = create_m_efficientnet_b0_eca().to(DEVICE)

Building model for device: cpu
Replacing SE modules with ECA...
Replacement complete.
Modifying Stage 7...
Modifying Stage 8...
Fixing Stage 9 (Conv1x1) input channels...
Model m-EfficientNetB0+ECA created successfully.


In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import time
import torch.nn.functional as F
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

LEARNING_RATE = 0.001
NUM_EPOCHS = 100
PATIENCE = 15
RF_FACTOR = 0.5
NUM_CLASSES = 7


optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

criterion = nn.CrossEntropyLoss()

scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=RF_FACTOR,
    patience=PATIENCE
)

print("Optimizer, Loss, and Scheduler are set up.")


def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def validate_epoch(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    epoch_loss = running_loss / len(all_labels)
    acc = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average='macro', zero_division=0
    )

    return epoch_loss, acc * 100, precision * 100, recall * 100, f1 * 100

print("Starting training...")
best_val_loss = float('inf')

for epoch in range(NUM_EPOCHS):
    start_time = time.time()

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc, val_prec, val_recall, val_f1 = validate_epoch(model, val_loader, criterion)

    end_time = time.time()

    print(f"Epoch {epoch+1}/{NUM_EPOCHS} | "
          f"Time: {(end_time - start_time):.2f}s")
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {val_loss:.4f}, Val Acc:   {val_acc:.2f}% | "
          f"Val F1: {val_f1:.2f}%")


    scheduler.step(val_loss)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "m_efficientnet_b0_eca_best.pth")
        print(f"  -> New best model saved with val_loss: {val_loss:.4f}")

print("Training complete.")

print("\nLoading best model for final testing...")
model.load_state_dict(torch.load("m_efficientnet_b0_eca_best.pth"))

test_loss, test_acc, test_prec, test_recall, test_f1 = validate_epoch(
    model, test_loader, criterion
)

print(f"--- FINAL TEST RESULTS ---")
print(f"  Test Loss:     {test_loss:.4f}")
print(f"  Test Accuracy: {test_acc:.2f}%")
print(f"  Test Precision (Macro): {test_prec:.2f}%")
print(f"  Test Recall (Macro):    {test_recall:.2f}%")
print(f"  Test F1-Score (Macro):  {test_f1:.2f}%")

Optimizer, Loss, and Scheduler are set up.
Starting training...


In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
import os
import copy
import time
import numpy as np
from sklearn.metrics import classification_report, accuracy_score

In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


In [25]:
# %%
# =============================================================================
# PART 2: ECA MODULE & M-EFFICIENTNETB0 MODEL
# =============================================================================

# [cite_start]Define the ECA Module (as per paper description [cite: 293-308] and Figure 8)
class ECA(nn.Module):
    """Efficient Channel Attention module"""
    def __init__(self, in_channels, kernel_size=3):
        super(ECA, self).__init__()
        self.gap = nn.AdaptiveAvgPool2d(1)
        # 1D convolution for local cross-channel interaction
        self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Global Average Pooling
        y = self.gap(x)

        # Squeeze, Conv1D, Sigmoid
        # B, C, 1, 1 -> B, C, 1 -> B, 1, C
        y = y.squeeze(-1).permute(0, 2, 1)
        # B, 1, C -> B, 1, C
        y = self.conv(y)
        # B, 1, C -> B, C, 1
        y = y.permute(0, 2, 1)

        # Apply sigmoid activation
        y = self.sigmoid(y)

        # B, C, 1 -> B, C, 1, 1
        y = y.unsqueeze(-1)

        # Element-wise multiplication
        return x * y.expand_as(x)

# Helper function to replace SE modules with ECA modules
def replace_se_with_eca(module):
    """
    Recursively iterates through all modules and replaces SqueezeExcitation
    with our ECA module.
    """
    for name, child in module.named_children():
        if isinstance(child, models.efficientnet.SqueezeExcitation):
            # The SqueezeExcitation module stores the input channels in its
            # first convolutional layer (fc1).
            in_channels = child.fc1.in_channels

            # Replace it with our ECA block
            setattr(module, name, ECA(in_channels=in_channels))
        elif len(list(child.children())) > 0:
            # Recurse
            replace_se_with_eca(child)

def create_model(num_classes=7):
    """
    Creates the modified EfficientNetB0 (m-EfficientNetB0) with ECA.
    """
    # 1. Load pre-trained EfficientNetB0
    model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)

    # 2. Replace all SE modules with ECA modules
    replace_se_with_eca(model)

    # 3. Apply architectural modifications for m-EfficientNetB0

    # --- THIS IS THE FINAL FIX ---

    # Reduce Stage 7 (features[6]) to one block
    # We MUST keep the first block [0] because it's the only one
    # that handles the channel transition from 112 (from Stage 5) to 192.
    model.features[6] = model.features[6][0]

    # Remove Stage 8 (features[7]) entirely
    model.features[7] = nn.Identity()

    # CRITICAL FIX: Adjust Stage 9 (features[8])
    # The original Stage 9 (features[8][0]) expects 320 channels from Stage 8.
    # Our new flow is: Stage 7 (out 192) -> Stage 8 (removed) -> Stage 9.
    # Therefore, Stage 9 will receive 192 channels, not 320.
    # We must rebuild the first Conv2d layer of Stage 9 to accept 192 channels.

    # Get the original weights and parameters from the old layer
    old_conv = model.features[8][0]
    out_channels = old_conv.out_channels
    kernel_size = old_conv.kernel_size
    stride = old_conv.stride
    padding = old_conv.padding
    bias = (old_conv.bias is not None)

    # Create the new layer with corrected in_channels
    new_conv = nn.Conv2d(in_channels=192,
                         out_channels=out_channels,
                         kernel_size=kernel_size,
                         stride=stride,
                         padding=padding,
                         bias=bias)

    # Replace the old layer with the new one
    model.features[8][0] = new_conv

    # ---------------------------------

    # 4. Replace the final classifier head
    # This remains correct. The input to the classifier is 1280 channels.
    in_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True), # Default dropout in EfficientNet
        nn.Linear(in_features, num_classes)
    )

    return model.to(device)

In [26]:
DATA_DIR = "D:/Sem7/Capstone 2/teaLeafBD/teaLeafBD" # <<< CHANGE THIS
BATCH_SIZE = 16 #
NUM_CLASSES = 7 # 6 diseases + 1 healthy [cite: 101, 352]
IMG_SIZE = 256 # Standardized size [cite: 345]

# --- Data Augmentation (as per Table 3 ) ---
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)), # Random crop 80-100%
    transforms.RandomVerticalFlip(p=0.5), # Vertical flip 50%
    transforms.RandomHorizontalFlip(p=0.5), # Horizontal flip 50%
    transforms.RandomRotation(20), # +/- 20 degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), # 0-10% translation
    transforms.ColorJitter(brightness=0.2), # +/- 20% brightness
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Standard normalization
])

# For validation and testing, just resize and normalize
val_test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- Load and Split Dataset ---
# Assuming DATA_DIR has subfolders for each of the 7 classes
try:
    full_dataset = datasets.ImageFolder(DATA_DIR)
    print(f"Found {len(full_dataset)} images in {len(full_dataset.classes)} classes.")
    print("Classes:", full_dataset.classes)

    # Verify class count
    if len(full_dataset.classes) != NUM_CLASSES:
        print(f"Warning: Expected {NUM_CLASSES} classes, but found {len(full_dataset.classes)}.")

except FileNotFoundError:
    print(f"Error: Data directory not found at {DATA_DIR}")
    print("Please download the teaLeafBD dataset and set the DATA_DIR variable.")
    # Stop execution if data isn't found
    raise

# Splitting data 80% train_phase, 20% test_phase
total_count = len(full_dataset)
test_count = int(0.2 * total_count)
train_val_count = total_count - test_count

train_val_dataset, test_dataset = random_split(full_dataset, [train_val_count, test_count],
                                         generator=torch.Generator().manual_seed(42))


Found 5276 images in 7 classes.
Classes: ['1. Tea algal leaf spot', '2. Brown Blight', '3. Gray Blight', '4. Helopeltis', '5. Red spider', '6. Green mirid bug', '7. Healthy leaf']


In [27]:
train_count = int(0.9 * train_val_count)
val_count = train_val_count - train_count

train_dataset, val_dataset = random_split(train_val_dataset, [train_count, val_count],
                                          generator=torch.Generator().manual_seed(42))

# Apply the correct transforms to each split
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_test_transform
test_dataset.dataset.transform = val_test_transform

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Training images: {len(train_dataset)}")
print(f"Validation images: {len(val_dataset)}")
print(f"Testing images: {len(test_dataset)}")


Training images: 3798
Validation images: 423
Testing images: 1055


In [28]:
# %%
# =============================================================================
# PART 4: TRAINING SETUP
# =============================================================================

model = create_model(num_classes=NUM_CLASSES)

# --- Hyperparameters from Table 1 ---
LEARNING_RATE = 0.001
EPOCHS = 100
PATIENCE = 15
REDUCTION_FACTOR = 0.5

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# --- THIS IS THE FIX ---
# The 'verbose' argument is deprecated/removed in newer PyTorch versions.
# The scheduler will still print updates by default when the LR changes.
scheduler = ReduceLROnPlateau(optimizer,
                              mode='min',
                              factor=REDUCTION_FACTOR,
                              patience=PATIENCE)
# ---------------------

In [29]:
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for epoch in range(EPOCHS):
    start_time = time.time()

    # --- Training Phase ---
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)

    # --- Validation Phase ---
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)

    # --- Epoch Statistics ---
    epoch_train_loss = train_loss / len(train_dataset)
    epoch_val_loss = val_loss / len(val_dataset)
    epoch_val_acc = val_corrects.double() / len(val_dataset)

    # Adjust learning rate based on validation loss [cite: 318]
    scheduler.step(epoch_val_loss)

    end_time = time.time()

    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Time: {end_time - start_time:.0f}s | "
          f"Train Loss: {epoch_train_loss:.4f} | "
          f"Val Loss: {epoch_val_loss:.4f} | "
          f"Val Acc: {epoch_val_acc:.4f}")

    # Save the best model
    if epoch_val_acc > best_acc:
        best_acc = epoch_val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(best_model_wts, 'm-EfficientNetB0-ECA_best.pth')

print(f"Training complete. Best Val Acc: {best_acc:.4f}")


Epoch 1/100 | Time: 1353s | Train Loss: 0.9070 | Val Loss: 0.6469 | Val Acc: 0.7896
Epoch 2/100 | Time: 1159s | Train Loss: 0.4964 | Val Loss: 0.5237 | Val Acc: 0.8392


KeyboardInterrupt: 