In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import DataLoader
from pathlib import Path
from PIL import Image
from torch import Tensor
from torch.utils.data import Dataset
from typing import List, Tuple

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
# Define the dataset class
class GroceryStoreDataset(Dataset):
    def __init__(self, split: str, transform=None) -> None:
        super().__init__()
        self.root = Path("GroceryStoreDataset/dataset")
        self.split = split
        self.paths, self.labels = self.read_file()
        self.transform = transform

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx) -> Tuple[Tensor, int]:
        img = Image.open(self.root / self.paths[idx])
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

    def read_file(self) -> Tuple[List[str], List[int]]:
        paths = []
        labels = []
        with open(self.root / f"{self.split}.txt") as f:
            for line in f:
                path, _, label = line.replace("\n", "").split(", ")
                paths.append(path)
                labels.append(int(label))
        return paths, labels

    def get_num_classes(self) -> int:
        return max(self.labels) + 1

In [11]:
# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Instantiate the datasets with the transform
train_dataset = GroceryStoreDataset(split='train', transform=transform)
val_dataset = GroceryStoreDataset(split='val', transform=transform)
test_dataset = GroceryStoreDataset(split='test', transform=transform)

# Calculate the number of classes from the train dataset
num_classes = train_dataset.get_num_classes()
print(f"Number of classes in the dataset: {num_classes}")

Number of classes in the dataset: 43


In [12]:
# Define the Bottleneck block
class Bottleneck(nn.Module):
    expansion = 2

    def __init__(self, in_channels, mid_channels, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride,
                               padding=1, groups=32, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, mid_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(mid_channels * self.expansion)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = None
        if stride != 1 or in_channels != mid_channels * self.expansion:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, mid_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(mid_channels * self.expansion)
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [13]:
# Define the ResNeXt architecture
class ResNeXt(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(ResNeXt, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 128, layers[0])
        self.layer2 = self._make_layer(block, 256, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 512, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 1024, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024 * block.expansion, num_classes)

    def _make_layer(self, block, mid_channels, blocks, stride=1):
        layers = []
        layers.append(block(self.in_channels, mid_channels, stride))
        self.in_channels = mid_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, mid_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [14]:
# Instantiate the model
model = ResNeXt(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)

# Move the model to the device
model.to(device)

# Datasets and DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [15]:
# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training and validation loop
def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100 * correct / total

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

def test_model(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= len(test_loader)
    test_accuracy = 100 * correct / total

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

In [16]:
# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=20)

Epoch [1/20], Train Loss: 2.8543, Train Accuracy: 27.50%, Val Loss: 5.4300, Val Accuracy: 16.55%
Epoch [2/20], Train Loss: 1.9312, Train Accuracy: 42.01%, Val Loss: 2.5268, Val Accuracy: 31.76%
Epoch [3/20], Train Loss: 1.6171, Train Accuracy: 48.83%, Val Loss: 2.8737, Val Accuracy: 31.42%
Epoch [4/20], Train Loss: 1.3777, Train Accuracy: 56.29%, Val Loss: 2.5229, Val Accuracy: 44.26%
Epoch [5/20], Train Loss: 1.1884, Train Accuracy: 60.34%, Val Loss: 2.0416, Val Accuracy: 39.53%
Epoch [6/20], Train Loss: 1.0413, Train Accuracy: 66.10%, Val Loss: 2.2655, Val Accuracy: 40.88%
Epoch [7/20], Train Loss: 0.8722, Train Accuracy: 71.86%, Val Loss: 3.0473, Val Accuracy: 35.14%
Epoch [8/20], Train Loss: 0.7440, Train Accuracy: 75.91%, Val Loss: 2.3233, Val Accuracy: 41.22%
Epoch [9/20], Train Loss: 0.6739, Train Accuracy: 77.27%, Val Loss: 1.9089, Val Accuracy: 47.97%
Epoch [10/20], Train Loss: 0.5551, Train Accuracy: 81.48%, Val Loss: 1.9216, Val Accuracy: 43.24%
Epoch [11/20], Train Loss: 0.

In [17]:
# Evaluate the model on the test set
test_model(model, test_loader, criterion, device)

Test Loss: 1.9772, Test Accuracy: 61.37%
