In [1]:
!git clone https://github.com/marcusklasson/GroceryStoreDataset.git

Cloning into 'GroceryStoreDataset'...
remote: Enumerating objects: 6559, done.[K
remote: Counting objects: 100% (266/266), done.[K
remote: Compressing objects: 100% (231/231), done.[K
remote: Total 6559 (delta 45), reused 35 (delta 35), pack-reused 6293[K
Receiving objects: 100% (6559/6559), 116.26 MiB | 17.79 MiB/s, done.
Resolving deltas: 100% (275/275), done.
Updating files: 100% (5717/5717), done.


In [2]:
from pathlib import Path
from PIL import Image
from torch import Tensor
from torch.utils.data import Dataset
from typing import List, Tuple

In [3]:
class GroceryStoreDataset(Dataset):

    def __init__(self, split: str, transform=None) -> None:
        super().__init__()

        self.root = Path("GroceryStoreDataset/dataset")
        self.split = split
        self.paths, self.labels = self.read_file()

        self.transform = transform

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx) -> Tuple[Tensor, int]:
        img = Image.open(self.root / self.paths[idx])
        label = self.labels[idx]

        if self.transform:
            img = self.transform(img)

        return img, label

    def read_file(self) -> Tuple[List[str], List[int]]:
        paths = []
        labels = []

        with open(self.root / f"{self.split}.txt") as f:
            for line in f:
                # path, fine-grained class, coarse-grained class
                path, _, label = line.replace("\n", "").split(", ")
                paths.append(path), labels.append(int(label))

        return paths, labels

    def get_num_classes(self) -> int:
        return max(self.labels) + 1

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

In [6]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = GroceryStoreDataset(split='train', transform=transform)
val_dataset = GroceryStoreDataset(split='val', transform=transform)
test_dataset = GroceryStoreDataset(split='test', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [7]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(128 * 16 * 16, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

num_classes = train_dataset.get_num_classes()
model = SimpleCNN(num_classes=num_classes)


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/20], Loss: 2.8945
Epoch [2/20], Loss: 1.8175
Epoch [3/20], Loss: 1.2197
Epoch [4/20], Loss: 0.8121
Epoch [5/20], Loss: 0.5445
Epoch [6/20], Loss: 0.3922
Epoch [7/20], Loss: 0.3304
Epoch [8/20], Loss: 0.2208
Epoch [9/20], Loss: 0.2388
Epoch [10/20], Loss: 0.1770
Epoch [11/20], Loss: 0.1682
Epoch [12/20], Loss: 0.0878
Epoch [13/20], Loss: 0.0922
Epoch [14/20], Loss: 0.1184
Epoch [15/20], Loss: 0.1107
Epoch [16/20], Loss: 0.0977
Epoch [17/20], Loss: 0.1034
Epoch [18/20], Loss: 0.0837
Epoch [19/20], Loss: 0.0922
Epoch [20/20], Loss: 0.0842


In [9]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Validation Accuracy: {100 * correct / total:.2f}%')

correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Test Accuracy: {100 * correct / total:.2f}%')


Validation Accuracy: 33.45%
Test Accuracy: 49.66%


In [11]:
# Ensure we are using the GPU if it's available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
from torchvision import models

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)

model_ft = model_ft.to(device)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)


In [14]:
num_epochs = 20
for epoch in range(num_epochs):
    model_ft.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer_ft.zero_grad()
        outputs = model_ft(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_ft.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Evaluate the model
model_ft.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model_ft(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Validation Accuracy: {100 * correct / total:.2f}%')

correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model_ft(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Test Accuracy: {100 * correct / total:.2f}%')


Epoch [1/20], Loss: 2.4671
Epoch [2/20], Loss: 0.9820
Epoch [3/20], Loss: 0.4683
Epoch [4/20], Loss: 0.2526
Epoch [5/20], Loss: 0.1589
Epoch [6/20], Loss: 0.0983
Epoch [7/20], Loss: 0.0743
Epoch [8/20], Loss: 0.0508
Epoch [9/20], Loss: 0.0422
Epoch [10/20], Loss: 0.0364
Epoch [11/20], Loss: 0.0301
Epoch [12/20], Loss: 0.0248
Epoch [13/20], Loss: 0.0225
Epoch [14/20], Loss: 0.0192
Epoch [15/20], Loss: 0.0181
Epoch [16/20], Loss: 0.0178
Epoch [17/20], Loss: 0.0147
Epoch [18/20], Loss: 0.0132
Epoch [19/20], Loss: 0.0149
Epoch [20/20], Loss: 0.0111
Validation Accuracy: 72.64%
Test Accuracy: 76.38%
