# Libraries

In [None]:
import glob
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim


# Downloading & Setting Up Data

In [None]:
def get_data_extract():
  if "dataset" in os.listdir():
    print("Dataset already exists")
  else:
    print("Downloading the data...")
    !wget -O food-data.zip https://www.kaggle.com/api/v1/datasets/download/trolukovich/food11-image-dataset
    print("Dataset downloaded!")
    print("Extracting data..")
    !mkdir dataset
    !unzip -q food-data.zip -d dataset
    print("Extraction done!")

get_data_extract()

Downloading the data...
--2025-04-26 18:15:57--  https://www.kaggle.com/api/v1/datasets/download/trolukovich/food11-image-dataset
Resolving www.kaggle.com (www.kaggle.com)... 35.244.233.98
Connecting to www.kaggle.com (www.kaggle.com)|35.244.233.98|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://storage.googleapis.com:443/kaggle-data-sets/432700/821742/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20250426%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250426T181557Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=2c954edd612b89a86a0884818a112b36531664d3a49da7e49118abdf02e9c4c1c50cd7c0941a692fa68eeeff93a9737d9afcb6e043acc899820a3e6ddaaeb567a35749e0c4341516db92222ff47f34460c8ad6218026ddd6985f769a73e29be16b666f8f5fe0b56fe1f736210544cbef7211f8ca02be9119ac5475c2d34c46a0c5439bc552e4e72840df76aac5a00249295a12e1618ec48bf541e0aed921be5af176509e4279d82ce3644b

In [None]:
# Training set
path_train = glob.glob('dataset/training/*/*.jpg')
label_train = [i.split(".")[0].split("/")[-2] for i in path_train]

# Validation set
path_val = glob.glob('dataset/validation/*/*.jpg')
label_val = [i.split(".")[0].split("/")[-2] for i in path_val]

# Evaluation set
path_eval = glob.glob('dataset/evaluation/*/*.jpg')
label_eval = [i.split(".")[0].split("/")[-2] for i in path_eval]

print(f"Train: {len(path_train)} images, {len(label_train)} labels")
print(f"Validation: {len(path_val)} images, {len(label_val)} labels")
print(f"Evaluation: {len(path_eval)} images, {len(label_eval)} labels")

Train: 9866 images, 9866 labels
Validation: 3430 images, 3430 labels
Evaluation: 3347 images, 3347 labels


In [None]:
label_train[:5]

['Egg', 'Egg', 'Egg', 'Egg', 'Egg']

In [None]:
set(label_train)

{'Bread',
 'Dairy product',
 'Dessert',
 'Egg',
 'Fried food',
 'Meat',
 'Noodles-Pasta',
 'Rice',
 'Seafood',
 'Soup',
 'Vegetable-Fruit'}

In [None]:
# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 244)),
    transforms.ToTensor(),
])

# Load datasets
train_dataset = datasets.ImageFolder(root='dataset/training', transform=transform)
val_dataset = datasets.ImageFolder(root='dataset/validation', transform=transform)
test_dataset = datasets.ImageFolder(root='dataset/evaluation', transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

ImageFolder automatically assigns a label to each image based on the folder name.

In [None]:
train_loader.dataset

Dataset ImageFolder
    Number of datapoints: 9866
    Root location: dataset/training
    StandardTransform
Transform: Compose(
               Resize(size=(224, 244), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

In [None]:
# Get one batch
batch = next(iter(train_loader))

# batch is a tuple: (inputs, labels)
inputs, labels = batch

print(labels.shape)
print(labels)

torch.Size([32])
tensor([ 3,  4, 10,  8,  5,  2,  3,  9,  2,  5,  1, 10,  3,  8,  9,  9,  4,  0,
         9,  5,  0,  9,  9,  0,  9,  5,  3,  3,  0,  3,  4,  2])


In [None]:
val_loader.dataset

Dataset ImageFolder
    Number of datapoints: 3430
    Root location: dataset/validation
    StandardTransform
Transform: Compose(
               Resize(size=(224, 244), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

In [None]:
test_loader.dataset

Dataset ImageFolder
    Number of datapoints: 3347
    Root location: dataset/evaluation
    StandardTransform
Transform: Compose(
               Resize(size=(224, 244), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

In [None]:
# plotting three images from each
train_features, train_labels = next(iter(train_loader))
val_features, val_labels = next(iter(val_loader))
test_features, test_labels = next(iter(test_loader))

def show_images(features, labels, title):
    plt.figure(figsize=(12, 4))
    for i in range(3):
        img = features[i].permute(1, 2, 0)
        plt.subplot(1, 3, i + 1)
        plt.imshow(img)
        plt.title(f"Label: {labels[i].item()}")
        plt.axis('off')
    plt.suptitle(title)
    plt.show()

show_images(train_features, train_labels, "Training Samples")
show_images(val_features, val_labels, "Validation Samples")
show_images(test_features, test_labels, "Evaluation Samples")

# Models

1. Load EfficientNet (pretrained)
2. Replace classification head
3. For feature extraction: freeze all base layers
4. For fine-tuning: unfreeze last n layers or progressively unfreeze
5. Train, validate, and plot metrics

## Training only HEAD of EfficientNet

Input Image --> EfficientNet Feature Extractor --> (Dropout) --> Linear(1280 -> Our classes)

In [None]:
# Load pretrained EfficientNet
model = models.efficientnet_b0(pretrained=True)

In [None]:
# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.classifier

In [None]:
model.classifier[1].in_features

- EfficientNet is used for 1000 Classes. We need to change this to match our task.

In [None]:
# Modify the classifier head to match our classes
num_classes = len(set(label_train))
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

In [None]:
model.classifier

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.classifier[1].parameters(), lr=1e-3)

# Training loop
epochs = 10

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * val_correct / val_total

    print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

## EfficientNet Freezed - HEAD + CNN + MLP

In [None]:
# Load pretrained EfficientNet
model = models.efficientnet_b0(pretrained=True)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 143MB/s]


In [None]:
class HybridModel(nn.Module):
    def __init__(self, efficientnet_backbone, num_classes):
        super(HybridModel, self).__init__()
        self.backbone = efficientnet_backbone
        self.backbone.classifier = nn.Identity()  # Remove EfficientNet head

        # Freeze EfficientNet
        for param in self.backbone.parameters():
            param.requires_grad = False

        self.extra_cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )

        self.mlp = nn.Sequential(
            nn.Linear(1280 + 64, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        features_effnet = self.backbone(x)  # [batch_size, 1280]
        features_cnn = self.extra_cnn(x)    # [batch_size, 64]
        combined = torch.cat((features_effnet, features_cnn), dim=1)  # [batch_size, 1344]
        output = self.mlp(combined)
        return output

In [None]:
# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define model
num_classes = 11
model = HybridModel(efficientnet_backbone=model, num_classes=num_classes)
model = model.to(device)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer (train only the new parts)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)

# Training settings
epochs = 10

# --- Training loop ---
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * val_correct / val_total

    print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

## EfficientNet Non Freezed - HEAD + CNN + MLP

- Uses different learning rates (small for EfficientNet, normal for CNN+MLP)


In [None]:
efficientnet_model = models.efficientnet_b0(weights='IMAGENET1K_V1')

In [None]:
class HybridModel(nn.Module):
    def __init__(self, efficientnet_backbone, num_classes):
        super(HybridModel, self).__init__()
        self.backbone = efficientnet_backbone
        self.backbone.classifier = nn.Identity()

        # Freeze EfficientNet
        for param in self.backbone.parameters():
            param.requires_grad = False

        self.extra_cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )

        self.mlp = nn.Sequential(
            nn.Linear(1280 + 64, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        features_effnet = self.backbone(x)
        features_cnn = self.extra_cnn(x)
        combined = torch.cat((features_effnet, features_cnn), dim=1)
        output = self.mlp(combined)
        return output

In [None]:
pip install mlflow

In [None]:
import mlflow
import mlflow.pytorch

with mlflow.start_run():

    # Define model
    num_classes = 11
    model = HybridModel(efficientnet_backbone=efficientnet_model, num_classes=num_classes)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    # === Unfreeze EfficientNet backbone ===
    for param in model.backbone.parameters():
        param.requires_grad = True

    # === Define optimizer with differential learning rates ===
    optimizer = optim.Adam([
        {'params': model.backbone.parameters(), 'lr': 1e-5},
        {'params': model.extra_cnn.parameters(), 'lr': 1e-3},
        {'params': model.mlp.parameters(), 'lr': 1e-3},
    ])

    # === Define loss function and scheduler ===
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

    # === Training settings ===
    epochs = 10

    # --- Training Loop ---
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total

        # --- Validation ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = 100 * val_correct / val_total

        scheduler.step()

        print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

        # ✅ Log metrics for every epoch
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("train_accuracy", train_acc, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_acc, step=epoch)

    # --- Test Set Evaluation ---
    model.eval()
    test_loss = 0.0
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

    test_loss /= len(test_loader)
    test_acc = 100 * test_correct / test_total

    print(f"\n✅ Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")

    # ✅ Save and log model
    torch.save(model.state_dict(), "model.pth")
    mlflow.pytorch.log_model(model, "model")

    # ✅ Log final test metrics
    mlflow.log_metric("test_loss", test_loss)
    mlflow.log_metric("test_accuracy", test_acc)

    # ✅ Log hyperparameters
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", 32)
    mlflow.log_param("lr_backbone", 1e-5)
    mlflow.log_param("lr_heads", 1e-3)

Epoch [1/10] | Train Loss: 0.5634 | Train Acc: 81.54% | Val Loss: 0.4526 | Val Acc: 86.41%
Epoch [2/10] | Train Loss: 0.2729 | Train Acc: 90.71% | Val Loss: 0.4096 | Val Acc: 88.22%
Epoch [3/10] | Train Loss: 0.1885 | Train Acc: 93.78% | Val Loss: 0.4155 | Val Acc: 87.84%
Epoch [4/10] | Train Loss: 0.1394 | Train Acc: 95.36% | Val Loss: 0.4647 | Val Acc: 88.22%
Epoch [5/10] | Train Loss: 0.1160 | Train Acc: 96.04% | Val Loss: 0.4815 | Val Acc: 88.02%
Epoch [6/10] | Train Loss: 0.0818 | Train Acc: 97.36% | Val Loss: 0.4535 | Val Acc: 88.89%
Epoch [7/10] | Train Loss: 0.0706 | Train Acc: 97.70% | Val Loss: 0.4409 | Val Acc: 89.10%
Epoch [8/10] | Train Loss: 0.0506 | Train Acc: 98.36% | Val Loss: 0.4780 | Val Acc: 89.39%
Epoch [9/10] | Train Loss: 0.0520 | Train Acc: 98.40% | Val Loss: 0.4950 | Val Acc: 89.30%
Epoch [10/10] | Train Loss: 0.0451 | Train Acc: 98.60% | Val Loss: 0.4859 | Val Acc: 88.98%

✅ Test Loss: 0.3920 | Test Accuracy: 91.43%




## Gradual Freezing + Scheduler

In [None]:
# --- Setup ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load EfficientNet backbone
efficientnet_model = models.efficientnet_b0(weights='IMAGENET1K_V1')

# Instantiate model
num_classes = 11
model = HybridModel(efficientnet_backbone=efficientnet_model, num_classes=num_classes)
model = model.to(device)

# --- Gradual Unfreezing ---
for name, param in model.backbone.named_parameters():
    if 'features.6' in name or 'features.7' in name or 'features.8' in name:
        param.requires_grad = True  # Unfreeze only last few layers
    else:
        param.requires_grad = False  # Freeze early layers

# --- Loss and Optimizer ---
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # label smoothing added

optimizer = optim.AdamW([
    {'params': model.backbone.parameters(), 'lr': 1e-5},
    {'params': model.extra_cnn.parameters(), 'lr': 1e-3},
    {'params': model.mlp.parameters(), 'lr': 1e-3},
], weight_decay=0.01)

# --- Scheduler (optional) ---
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# --- Training Loop ---
epochs = 10

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * val_correct / val_total

    # Step scheduler
    scheduler.step()

    print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

In [16]:
# Zip the mlruns folder
!zip -r mlruns.zip mlruns


  adding: mlruns/ (stored 0%)
  adding: mlruns/.trash/ (stored 0%)
  adding: mlruns/0/ (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/ (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/params/ (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/params/lr_heads (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/params/lr_backbone (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/params/epochs (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/params/batch_size (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/meta.yaml (deflated 43%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/metrics/ (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/metrics/train_accuracy (deflated 51%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/metrics/test_loss (stored 0%)
  adding: mlruns/0/9602c2bfff504895a9386b39f1a7bdab/metrics/val_accuracy (deflated 53%)
  adding: mlruns/0/9602c2bfff50

## Stronger Model B4

In [None]:
# Correct transforms for EfficientNet-B3
transform = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
])

# Load datasets
train_dataset = datasets.ImageFolder(root='dataset/training', transform=transform)
val_dataset = datasets.ImageFolder(root='dataset/validation', transform=transform)
test_dataset = datasets.ImageFolder(root='dataset/evaluation', transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch
import torch.nn as nn

class HybridModel(nn.Module):
    def __init__(self, efficientnet_backbone, num_classes):
        super(HybridModel, self).__init__()
        self.backbone = efficientnet_backbone
        self.backbone.classifier = nn.Identity()

        for param in self.backbone.parameters():
            param.requires_grad = False

        self.extra_cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )

        # Create a dummy input to pass through backbone to get output size
        dummy_input = torch.zeros(1, 3, 300, 300)
        with torch.no_grad():
            backbone_output = self.backbone(dummy_input)
            backbone_features = backbone_output.shape[1]

        extra_cnn_features = 64

        self.mlp = nn.Sequential(
            nn.Linear(backbone_features + extra_cnn_features, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        features_effnet = self.backbone(x)
        features_cnn = self.extra_cnn(x)
        combined = torch.cat((features_effnet, features_cnn), dim=1)
        output = self.mlp(combined)
        return output

In [None]:


# --- Setup ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load EfficientNet-B3
efficientnet_model = models.efficientnet_b3(weights='IMAGENET1K_V1')

# Define model
num_classes = 11
model = HybridModel(efficientnet_backbone=efficientnet_model, num_classes=num_classes)
model = model.to(device)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer (train all parameters)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)



# --- Training ---
epochs = 10

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * val_correct / val_total

    print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")