# Transfer Learning with Pre-trained Models [3 points]
Use in-build models with pre-trained weights and apply them to the Food-11 dataset.

In [None]:
import kagglehub
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
import time, copy, os
from PIL import Image
from torch.utils.data import Dataset, DataLoader

In [None]:
path = kagglehub.dataset_download("vermaavi/food11")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/vermaavi/food11?dataset_version_number=1...


100%|██████████| 2.17G/2.17G [00:14<00:00, 165MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/vermaavi/food11/versions/1


## Step 1: Select at least THREE different pre-trained models
Selected at least THREE different pre-trained models, e.g. ShuffleNet, Inception V3, and MobileNet V3. Check PyTorch documentation for more details. Justify your choice of models, considering their architectural strengths and suitability for the task.

We have chosen

In [None]:
data_dir = path
train_dir = os.path.join(data_dir, 'training')
val_dir = os.path.join(data_dir, 'validation')
test_dir = os.path.join(data_dir, 'evaluation')

In [None]:
print(os.listdir('/root/.cache/kagglehub/datasets/vermaavi/food11/versions/1/training'))

['2_444.jpg', '5_1321.jpg', '9_1290.jpg', '5_954.jpg', '1_283.jpg', '3_666.jpg', '0_974.jpg', '8_646.jpg', '8_696.jpg', '10_127.jpg', '2_739.jpg', '9_520.jpg', '9_16.jpg', '6_48.jpg', '9_1493.jpg', '10_653.jpg', '4_102.jpg', '2_50.jpg', '4_534.jpg', '8_607.jpg', '0_517.jpg', '8_254.jpg', '8_187.jpg', '2_360.jpg', '5_650.jpg', '0_837.jpg', '9_905.jpg', '10_292.jpg', '2_7.jpg', '8_414.jpg', '1_246.jpg', '10_165.jpg', '10_439.jpg', '9_1422.jpg', '9_1050.jpg', '5_1319.jpg', '2_334.jpg', '2_1200.jpg', '0_96.jpg', '7_277.jpg', '10_202.jpg', '3_837.jpg', '0_747.jpg', '9_106.jpg', '6_305.jpg', '2_1377.jpg', '0_205.jpg', '4_0.jpg', '0_437.jpg', '3_28.jpg', '3_212.jpg', '6_382.jpg', '10_78.jpg', '0_954.jpg', '3_356.jpg', '8_163.jpg', '3_197.jpg', '9_1141.jpg', '5_615.jpg', '0_494.jpg', '1_139.jpg', '4_234.jpg', '3_540.jpg', '10_73.jpg', '9_704.jpg', '2_943.jpg', '3_786.jpg', '3_534.jpg', '2_1160.jpg', '3_215.jpg', '5_1054.jpg', '9_1450.jpg', '9_692.jpg', '2_851.jpg', '2_647.jpg', '0_680.jpg', '4

In [None]:
class Food11Dataset(Dataset):
    def __init__(self, root_dir, transform=None):

        self.root_dir = root_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(root_dir) if f.endswith('.jpg')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        label_str = img_name.split('_')[0]
        label = int(label_str)
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

dataTransforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_dir = '/root/.cache/kagglehub/datasets/vermaavi/food11/versions/1/training'
val_dir   = '/root/.cache/kagglehub/datasets/vermaavi/food11/versions/1/validation'
test_dir  = '/root/.cache/kagglehub/datasets/vermaavi/food11/versions/1/evaluation'

default_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])
# Transform for Inception
inception_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = Food11Dataset(train_dir, transform=default_transform)
val_dataset   = Food11Dataset(val_dir, transform=default_transform)
test_dataset  = Food11Dataset(test_dir, transform=default_transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

dataloaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}
dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset),
    'test': len(test_dataset)
}


## Step 2: For each chosen model

### a. Load the pre-trained model and modify the classification head
Load the pre-trained model and modify the classification head (the final fully connected layer) to match the number of classes in the Food 11 dataset.

In [6]:
def initialize_model(model_name, num_classes, feature_extract=True, use_pretrained=True):

    model_ft = None
    input_size = 224
    if model_name == "shufflenet":
        model_ft = models.shufflenet_v2_x1_0(pretrained=use_pretrained)
        if feature_extract:
            for param in model_ft.parameters():
                param.requires_grad = False

        model_ft.fc = nn.Linear(model_ft.fc.in_features, num_classes)

    elif model_name == "inception":
        input_size = 299
        model_ft = models.inception_v3(pretrained=use_pretrained)
        if feature_extract:
            for param in model_ft.parameters():
                param.requires_grad = False

        model_ft.fc = nn.Linear(model_ft.fc.in_features, num_classes)
        if model_ft.aux_logits:
            model_ft.AuxLogits.fc = nn.Linear(model_ft.AuxLogits.fc.in_features, num_classes)

    elif model_name == "mobilenet":
        model_ft = models.mobilenet_v3_large(pretrained=use_pretrained)
        if feature_extract:
            for param in model_ft.parameters():
                param.requires_grad = False

        model_ft.classifier[3] = nn.Linear(model_ft.classifier[3].in_features, num_classes)

    return model_ft, input_size

def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=10, is_inception=False):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4 * loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f"{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        print()

    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best Val Acc: {best_acc:.4f}")

    model.load_state_dict(best_model_wts)
    return model

def evaluate_model(model, dataloader, dataset_size, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / dataset_size
    total_acc = running_corrects.double() / dataset_size
    print(f"Test Loss: {total_loss:.4f} Acc: {total_acc:.4f}")


### b. Fine-tune the model
Fine-tune the model. Experiment with different hyperparameter settings (learning rate, batch size, etc.) to optimize performance. Explain your tuning strategy.

In [7]:
num_classes = 11
num_epochs = 10
learning_rate = 0.001
feature_extract = False

# List of models to train.
model_names = ['shufflenet', 'inception', 'mobilenet']

for model_name in model_names:
    print(f"\n Training {model_name.upper()}")

    model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
    model_ft = model_ft.to(device)

    if model_name == 'inception':
        train_dataset_inception = Food11Dataset(train_dir, transform=inception_transform)
        val_dataset_inception   = Food11Dataset(val_dir, transform=inception_transform)
        test_dataset_inception  = Food11Dataset(test_dir, transform=inception_transform)
        dataloaders['train'] = DataLoader(train_dataset_inception, batch_size=batch_size, shuffle=True, num_workers=4)
        dataloaders['val']   = DataLoader(val_dataset_inception, batch_size=batch_size, shuffle=False, num_workers=4)
        dataloaders['test']  = DataLoader(test_dataset_inception, batch_size=batch_size, shuffle=False, num_workers=4)
        dataset_sizes['train'] = len(train_dataset_inception)
        dataset_sizes['val']   = len(val_dataset_inception)
        dataset_sizes['test']  = len(test_dataset_inception)


    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

    is_inception = True if model_name == 'inception' else False

    model_ft = train_model(model_ft, dataloaders, dataset_sizes, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=is_inception)



 Training SHUFFLENET


Downloading: "https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth" to /root/.cache/torch/hub/checkpoints/shufflenetv2_x1-5666bf0f80.pth
100%|██████████| 8.79M/8.79M [00:00<00:00, 63.3MB/s]


Epoch 1/10
----------
Train Loss: 1.0331 Acc: 0.6812
Val Loss: 0.6297 Acc: 0.8000

Epoch 2/10
----------
Train Loss: 0.5181 Acc: 0.8296
Val Loss: 0.5530 Acc: 0.8251

Epoch 3/10
----------
Train Loss: 0.3826 Acc: 0.8755
Val Loss: 0.5189 Acc: 0.8399

Epoch 4/10
----------
Train Loss: 0.3160 Acc: 0.8963
Val Loss: 0.5132 Acc: 0.8417

Epoch 5/10
----------
Train Loss: 0.2685 Acc: 0.9130
Val Loss: 0.5244 Acc: 0.8478

Epoch 6/10
----------
Train Loss: 0.2242 Acc: 0.9280
Val Loss: 0.5057 Acc: 0.8499

Epoch 7/10
----------
Train Loss: 0.1919 Acc: 0.9352
Val Loss: 0.5153 Acc: 0.8501

Epoch 8/10
----------
Train Loss: 0.1902 Acc: 0.9358
Val Loss: 0.5553 Acc: 0.8420

Epoch 9/10
----------
Train Loss: 0.1725 Acc: 0.9432
Val Loss: 0.5129 Acc: 0.8525

Epoch 10/10
----------
Train Loss: 0.1348 Acc: 0.9547
Val Loss: 0.6877 Acc: 0.8175

Training complete in 3m 41s
Best Val Acc: 0.8525

 Training INCEPTION


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 151MB/s]


Epoch 1/10
----------
Train Loss: 1.5905 Acc: 0.6357
Val Loss: 1.0365 Acc: 0.6901

Epoch 2/10
----------
Train Loss: 1.0504 Acc: 0.7527
Val Loss: 0.9397 Acc: 0.7198

Epoch 3/10
----------
Train Loss: 0.8563 Acc: 0.7932
Val Loss: 0.7329 Acc: 0.7633

Epoch 4/10
----------
Train Loss: 0.7328 Acc: 0.8328
Val Loss: 0.7051 Acc: 0.7776

Epoch 5/10
----------
Train Loss: 0.6335 Acc: 0.8483
Val Loss: 0.6838 Acc: 0.7863

Epoch 6/10
----------
Train Loss: 0.5618 Acc: 0.8686
Val Loss: 0.6426 Acc: 0.7939

Epoch 7/10
----------
Train Loss: 0.4702 Acc: 0.8898
Val Loss: 0.8256 Acc: 0.7650

Epoch 8/10
----------
Train Loss: 0.4447 Acc: 0.8968
Val Loss: 0.5067 Acc: 0.8472

Epoch 9/10
----------
Train Loss: 0.3666 Acc: 0.9153
Val Loss: 0.5460 Acc: 0.8455

Epoch 10/10
----------
Train Loss: 0.3581 Acc: 0.9143
Val Loss: 0.6087 Acc: 0.8289

Training complete in 11m 25s
Best Val Acc: 0.8472

 Training MOBILENET


Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth
100%|██████████| 21.1M/21.1M [00:01<00:00, 12.1MB/s]


Epoch 1/10
----------
Train Loss: 0.7078 Acc: 0.7643
Val Loss: 0.8032 Acc: 0.7592

Epoch 2/10
----------
Train Loss: 0.4058 Acc: 0.8678
Val Loss: 0.6133 Acc: 0.8035

Epoch 3/10
----------
Train Loss: 0.3203 Acc: 0.8934
Val Loss: 0.7768 Acc: 0.7805

Epoch 4/10
----------
Train Loss: 0.2614 Acc: 0.9150
Val Loss: 0.8720 Acc: 0.7708

Epoch 5/10
----------
Train Loss: 0.2252 Acc: 0.9265
Val Loss: 0.5340 Acc: 0.8496

Epoch 6/10
----------
Train Loss: 0.2103 Acc: 0.9349
Val Loss: 0.5409 Acc: 0.8472

Epoch 7/10
----------
Train Loss: 0.2010 Acc: 0.9358
Val Loss: 0.4789 Acc: 0.8732

Epoch 8/10
----------
Train Loss: 0.1647 Acc: 0.9441
Val Loss: 0.7722 Acc: 0.8070

Epoch 9/10
----------
Train Loss: 0.1657 Acc: 0.9478
Val Loss: 0.8193 Acc: 0.7968

Epoch 10/10
----------
Train Loss: 0.1505 Acc: 0.9491
Val Loss: 0.6791 Acc: 0.8466

Training complete in 6m 52s
Best Val Acc: 0.8732


Justifications for choosing the model : 

ShuffleNet V2, Inception V3, and MobileNet V3 were selected due to advantages overcoming the challenges in the Food 11 dataset. ShuffleNet V2 is appropriate for low computationalresource environments due to its light weight structure and channel shuffling operations, which enable fast inference and low memory usage

Inception V3 offers architecture with fresh inception modules that learn multi scale features in the same layer. It proven performance on large scale datasets like ImageNet indicates its capacity to generalize well to challenging classification tasks, and it is therefore good option for achieving high accuracy.

MobileNet V3 was chosen because it offers the best tradeoff between computation and model accuracy. It features newer architectural advancements, such as squeeze and excitation blocks and few more activation functions, that enhance performance without significantly increasing its size.



### c. Evaluate the performance of each fine-tuned model
Evaluate the performance of each fine-tuned model on the Food 11 dataset.

Answer : Already done has part of the previous code excecution only .

### d. Compare the results obtained with the different pre-trained models
Discuss which model performed best and analyze the reasons for the observed differences in performance.

Answer : MobileNet performed best overall with the best validation accuracy of about 87.32%, then ShuffleNet at about 85.25%, and the Inception model in last place at about 84.72%. MobileNet succeeds because it performs well with depthwise separable convolutions and inverted residuals, allowing it to learn deep features at a relatively low parameter and computational expense. On the other hand, ShuffleNet while also efficiency improved through channel shuffling and grouped convolutions trades off somewhat in capacity to maintain lightweight characteristics, which could have contributed to its having slightly lower performance. The Inception model might have faltered since its greater complexity and more dense computation required more tweaks or extra hours of training before it was optimally used in this particular data.

## Step 3: References
Include details on all the resources used to complete this part.

https://youtu.be/yofjFQddwHE?si=R5u6AcHT9IoAkbn2 - TransferLearning

https://arxiv.org/abs/1807.11164 -- ShuffleNet


Inception model - https://arxiv.org/abs/1512.00567

Food 11 Dataset - Kaggle