In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import DataLoader
from pathlib import Path
from PIL import Image
from torch import Tensor
from torch.utils.data import Dataset
from typing import List, Tuple


# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Define the dataset class
class GroceryStoreDataset(Dataset):
    def __init__(self, split: str, transform=None) -> None:
        super().__init__()
        self.root = Path("GroceryStoreDataset/dataset")
        self.split = split
        self.paths, self.labels = self.read_file()
        self.transform = transform

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx) -> Tuple[Tensor, int]:
        img = Image.open(self.root / self.paths[idx])
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

    def read_file(self) -> Tuple[List[str], List[int]]:
        paths = []
        labels = []
        with open(self.root / f"{self.split}.txt") as f:
            for line in f:
                path, _, label = line.replace("\n", "").split(", ")
                paths.append(path)
                labels.append(int(label))
        return paths, labels

    def get_num_classes(self) -> int:
        return max(self.labels) + 1

In [3]:
# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Instantiate the datasets with the transform
train_dataset = GroceryStoreDataset(split='train', transform=transform)
val_dataset = GroceryStoreDataset(split='val', transform=transform)
test_dataset = GroceryStoreDataset(split='test', transform=transform)

# Calculate the number of classes from the train dataset
num_classes = train_dataset.get_num_classes()
print(f"Number of classes in the dataset: {num_classes}")

Number of classes in the dataset: 43


In [4]:
# Define the Bottleneck block
class Bottleneck(nn.Module):
    expansion = 2

    def __init__(self, in_channels, mid_channels, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride,
                               padding=1, groups=32, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, mid_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(mid_channels * self.expansion)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = None
        if stride != 1 or in_channels != mid_channels * self.expansion:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, mid_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(mid_channels * self.expansion)
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

# Define the ResNeXt architecture
class ResNeXt(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(ResNeXt, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 128, layers[0])
        self.layer2 = self._make_layer(block, 256, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 512, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 1024, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024 * block.expansion, num_classes)

    def _make_layer(self, block, mid_channels, blocks, stride=1):
        layers = []
        layers.append(block(self.in_channels, mid_channels, stride))
        self.in_channels = mid_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, mid_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [5]:
# Training and validation loop
def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100 * correct / total

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

def test_model(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= len(test_loader)
    test_accuracy = 100 * correct / total

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

In [9]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [10]:
# Part 1: Train custom ResNeXt-like model
model = ResNeXt(Bottleneck, [3, 4, 6, 3], num_classes=num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=20)

Epoch [1/20], Train Loss: 2.9263, Train Accuracy: 26.21%, Val Loss: 2.8523, Val Accuracy: 23.65%
Epoch [2/20], Train Loss: 1.9148, Train Accuracy: 41.25%, Val Loss: 3.4350, Val Accuracy: 28.04%
Epoch [3/20], Train Loss: 1.6592, Train Accuracy: 46.67%, Val Loss: 3.0935, Val Accuracy: 24.32%
Epoch [4/20], Train Loss: 1.3402, Train Accuracy: 55.11%, Val Loss: 2.9053, Val Accuracy: 28.38%
Epoch [5/20], Train Loss: 1.1897, Train Accuracy: 61.06%, Val Loss: 1.9029, Val Accuracy: 46.62%
Epoch [6/20], Train Loss: 0.9309, Train Accuracy: 68.71%, Val Loss: 2.5602, Val Accuracy: 37.50%
Epoch [7/20], Train Loss: 0.8629, Train Accuracy: 71.74%, Val Loss: 2.6008, Val Accuracy: 40.20%
Epoch [8/20], Train Loss: 0.8444, Train Accuracy: 72.84%, Val Loss: 2.8186, Val Accuracy: 33.78%
Epoch [9/20], Train Loss: 0.6117, Train Accuracy: 80.45%, Val Loss: 2.7902, Val Accuracy: 34.46%
Epoch [10/20], Train Loss: 0.5606, Train Accuracy: 82.69%, Val Loss: 3.1688, Val Accuracy: 37.50%
Epoch [11/20], Train Loss: 0.

In [11]:
test_model(model, test_loader, criterion, device)

Test Loss: 2.1650, Test Accuracy: 61.13%


'''part 2'''

In [14]:
# Load the pretrained ResNet-18 model
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [15]:
# Use the same optimizer and learning rate as Part 1
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10)
test_model(model, test_loader, criterion, device)

# Part 2: Fine-tune pretrained ResNet-18 model with improved hyperparameters

# Enhanced Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Epoch [1/10], Train Loss: 1.0681, Train Accuracy: 71.67%, Val Loss: 2.3785, Val Accuracy: 46.62%
Epoch [2/10], Train Loss: 0.2494, Train Accuracy: 92.99%, Val Loss: 0.9733, Val Accuracy: 72.97%
Epoch [3/10], Train Loss: 0.1277, Train Accuracy: 96.40%, Val Loss: 1.4549, Val Accuracy: 60.14%
Epoch [4/10], Train Loss: 0.1184, Train Accuracy: 97.05%, Val Loss: 2.0857, Val Accuracy: 52.03%
Epoch [5/10], Train Loss: 0.1465, Train Accuracy: 95.42%, Val Loss: 2.1275, Val Accuracy: 56.08%
Epoch [6/10], Train Loss: 0.1174, Train Accuracy: 96.70%, Val Loss: 2.5317, Val Accuracy: 52.36%
Epoch [7/10], Train Loss: 0.1226, Train Accuracy: 96.48%, Val Loss: 1.6202, Val Accuracy: 62.50%
Epoch [8/10], Train Loss: 0.0531, Train Accuracy: 98.52%, Val Loss: 1.0212, Val Accuracy: 69.59%
Epoch [9/10], Train Loss: 0.0412, Train Accuracy: 98.86%, Val Loss: 1.1600, Val Accuracy: 69.93%
Epoch [10/10], Train Loss: 0.0600, Train Accuracy: 98.56%, Val Loss: 1.5723, Val Accuracy: 63.51%
Test Loss: 1.4794, Test Accur

In [16]:
# Re-instantiate the datasets with enhanced transforms
train_dataset = GroceryStoreDataset(split='train', transform=transform)
val_dataset = GroceryStoreDataset(split='val', transform=transform)
test_dataset = GroceryStoreDataset(split='test', transform=transform)

# Re-instantiate the data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [17]:
# Re-instantiate the model and optimizer
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

# Use SGD with momentum for optimization
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [18]:
# Fine-tune with improved hyperparameters
train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=20)

Epoch [1/20], Train Loss: 1.7148, Train Accuracy: 55.45%, Val Loss: 1.0203, Val Accuracy: 67.91%
Epoch [2/20], Train Loss: 0.2254, Train Accuracy: 94.89%, Val Loss: 0.5576, Val Accuracy: 80.74%
Epoch [3/20], Train Loss: 0.0759, Train Accuracy: 98.60%, Val Loss: 0.5776, Val Accuracy: 80.07%
Epoch [4/20], Train Loss: 0.0383, Train Accuracy: 99.47%, Val Loss: 0.5021, Val Accuracy: 85.14%
Epoch [5/20], Train Loss: 0.0336, Train Accuracy: 99.58%, Val Loss: 0.5976, Val Accuracy: 81.42%
Epoch [6/20], Train Loss: 0.0625, Train Accuracy: 98.33%, Val Loss: 0.6100, Val Accuracy: 82.09%
Epoch [7/20], Train Loss: 0.0182, Train Accuracy: 99.73%, Val Loss: 0.4704, Val Accuracy: 84.46%
Epoch [8/20], Train Loss: 0.0111, Train Accuracy: 99.89%, Val Loss: 0.4598, Val Accuracy: 86.15%
Epoch [9/20], Train Loss: 0.0106, Train Accuracy: 99.85%, Val Loss: 0.5347, Val Accuracy: 83.11%
Epoch [10/20], Train Loss: 0.0117, Train Accuracy: 99.92%, Val Loss: 0.4371, Val Accuracy: 83.78%
Epoch [11/20], Train Loss: 0.

In [19]:
# Evaluate the fine-tuned ResNet-18 model with improved hyperparameters
test_model(model, test_loader, criterion, device)

Test Loss: 0.4361, Test Accuracy: 88.05%
