### Course Project: Household Item Classifier

**Author**:      Benjamin Luo \
**Email**: b33luo@uwaterloo.ca \
**Course**: SYDE 572 (F23)

**Python ver**: 3.11.6 

In [1]:
train_dir = './train2'
test_dir = './test2'
validation_dir = './validation'

batch_size = 16
lr = 0.12
max_epochs = 12

In [2]:
# %pip install torch==2.1.1
# %pip install torchvision==0.16.1
# %pip install matplotlib
# %pip install scikit-learn
# %pip install pandas
# %pip install skorch

In [3]:
import numpy as np
import torch 
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")  # For mac users

#### Dataloader and pre-processing

In [4]:
train_transform = transforms.Compose([
    # transforms.Grayscale(num_output_channels=1),
    transforms.Resize(255),  # Resize to fit the model input size
    # transforms.RandomHorizontalFlip(p=0.5),  # Flip because half the images are inverted
    # transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # Fine-tuning
])

test_transform = transforms.Compose([
    # transforms.Grayscale(num_output_channels=1),
    transforms.Resize(255),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

trainset = datasets.ImageFolder(root=train_dir, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = datasets.ImageFolder(test_dir, transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size)

classes = trainset.classes  # For mapping ImageFolder indices to class names

torch.manual_seed(1217)
torch.mps.manual_seed(1217)
torch.backends.cudnn.deterministic = True
np.random.seed(1217)

### Transfer Learning

Here, ResNet50 and MobileNetv2 were shown to have the greatest results. ResNet50 was more consistent so it is used for the final submission

For the optimizer, ADAM generally had noticibly better accuracy than SGD 

In [5]:
import torchvision.models as models

# Loading the pretrained model
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
# model = models.mobilenet_v2(pretrained=True)
model.to(device=device)

# Freezing the weights
for param in model.parameters():
  param.requires_grad = False

# Tailoring the final model to output 22 classes (corresponding to 22 household items)
model.fc = nn.Linear(model.fc.in_features, 22, device=device)  #resnet50
# model.classifier[1] = nn.Linear(model.last_channel, 22, device=device)  #mobilenetv2

criterion = nn.CrossEntropyLoss()
params = model.parameters()

optimizer = optim.SGD(params, lr=lr)
# optimizer = optim.Adam(params, lr=lr)

### Training and Testing

In [6]:
def train(model,trainloader,optimizer,criterion):
  model.train()

  total_loss = []
  for data in trainloader:
    images, labels = data
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()

    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward() 
    total_loss.append(loss.item())
    optimizer.step()
  return np.average(total_loss)

In [7]:
def eval_train(model, testloader, testset):
  model.eval()
  correct = 0.0

  for data in testloader:
    images, labels = data
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    _,preds = outputs.max(1)
    correct += preds.eq(labels).sum()

  return correct.float()/len(testset)

The below code trains the model using the trainloader data

In [8]:
for epoch in range(max_epochs):
  train(model, trainloader, optimizer, criterion)

...and then returns the accuracy of the test set

In [9]:
accu = eval_train(model, testloader, testset)
print('Accuracy: ', accu.item())

Accuracy:  0.9342359900474548


### Appendix 1: Outputting Results in a CSV

First, define a custom class based on the ImageFolder class. This is important for extracting the pathname from the file for outputting into the CSV

In [10]:
class ImageFolderWithPaths(datasets.ImageFolder):
   def __getitem__(self, index):
       img, label = super(ImageFolderWithPaths, self).__getitem__(index)
       path = self.imgs[index][0]
       return (img, label, path)
    
validationset = ImageFolderWithPaths(validation_dir, transform=test_transform)
validationloader = torch.utils.data.DataLoader(validationset)

Next, run the model on the entire validation set and keep track of the predicted labels and reformat the dataframe before saving it into a CSV file

In [11]:
def validation_pred(model):
    model.eval()
    predictions = []
    filenames = []
    with torch.no_grad():
        for images, _, paths in validationloader:
            images = images.to(device)
            output = model(images)
            _, predicted = torch.max(output, 1)
            predictions.extend(predicted.cpu().numpy())
            filenames.extend(paths)

    return predictions, filenames

In [12]:
import pandas as pd

predictions, filenames = validation_pred(model)
predictions = [classes[predict] for predict in predictions]
filenames = [int(filename.rsplit('/', 1)[1][:-4]) for filename in filenames]

# Create a DataFrame from the predictions
df = pd.DataFrame(predictions, columns=['category'])
df.insert(0, 'id', filenames)
df = df.sort_values(by=df.columns[0])

# Save the DataFrame to a CSV file
df.to_csv('luo.csv', index=False)

### Appendix 2: Visualizing Outputs

The below cell is experimental code that randomly selects an image from the test set and plots it via matplotlib, as well as prints its true and predicted label for empirical analysis

In [13]:
# import matplotlib.pyplot as plt
# import random

# def rand_pred(model):
#     model.eval()
#     image, label = random.choice(testloader.dataset)
#     mps_image = image.to(device)

#     outputs = model(mps_image.unsqueeze(0))
#     _,pred = outputs.max(1)
    
#     print("Predicted:", classes[pred.item()])
#     print("True:", classes[label])
#     plt.imshow(image.permute(1, 2, 0))

# rand_pred(model)

### Appendix 3: Hyperparameter Optimization

Grid search is manually implemented. The hyperparameters of interest are **epochs, learning rate (lr), and batch size**. Approximately 600 combinations are attempted in total for the following pretrained models:

* VGG19
* ResNet50
* ResNet34
* MobileNetv2 (regular and quantized)
* MobileNetv3 (small and large)

For the optimizer, both SGD and ADAM were attempted but ADAM consistently returned better results and was subsequently used for more trials. The grid parameters for ADAM are:

```py
    'max_epochs': [8, 10, 12, 14, 16, 18, 20]  # Must be kept small for transfer learning
    'lr': [0.005, 0.01, 0.015]                 # Optimal lr range for ADAM optimizer
    'batch_size': [8, 14, 16, 24, 36, 48, 64]  # Trying different values (for fine-tuning)
```

MobileNetv2 and ResNet50 produced the best results, with ResNet50 being more consistent and the selected algorithm

In [14]:
# import random

# grid_params = {
#     'max_epochs': [13],
#     'lr': [0.12],
#     'batch_size': [8, 16, 22, 24, 32, 36, 48, 52, 56, 64]
# }

# import torchvision.models as models

# for epochs in grid_params['max_epochs']:
#     for lrs in grid_params['lr']:
#         for batch_sizes in grid_params['batch_size']:

#             seed = random.randint(0, 10000)
#             torch.manual_seed(seed)
#             torch.mps.manual_seed(seed)
#             torch.backends.cudnn.deterministic = True
#             random.seed(seed)
#             np.random.seed(seed)

#             # Load data
#             trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_sizes, shuffle=True)
#             testloader = torch.utils.data.DataLoader(testset, batch_size=batch_sizes)

#             # Load model
#             model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
#             # model = models.mobilenet_v2(pretrained=True)
#             # models.quantization.mobilenet_v2(pretrained='IMAGENET1K_QNNPACK_V1')

#             # model = models.mobilenet_v3_large(pretrained=True)
#             # model = models.mobilenet_v3_small(pretrained=True)
#             # model = models.vgg19()
#             model.to(device=device)

#             # Freeze model
#             for param in model.parameters():
#                 param.requires_grad = False

#             # Define final layer resnet50
#             model.fc = nn.Linear(model.fc.in_features, 22, device=device)
#             # model = nn.Sequential(model, nn.Dropout(0.5))
#             # model.classifier[1] = nn.Linear(model.last_channel, 22, device=device)  #mobilenetv2
#             # model.classifier = nn.Linear(960, 22, device=device)  #mobilenet v3 large
#             # model.classifier[-1] = nn.Linear(1024, 22, device=device)  #mobilenet v3 small
#             # model.classifier[-1] = torch.nn.Linear(model.classifier[-1].in_features, 22, device=device)  #vgg19

#             criterion = nn.CrossEntropyLoss()
#             params = model.parameters()

#             optimizer = optim.SGD(params, lr=lrs)
#             # optimizer = optim.Adam(params, lr=lrs)

#             # Training
#             for ep in range(epochs):
#                 train(model, trainloader, optimizer, criterion)
            
#             # Test accuracy and output
#             accu = eval_train(model, testloader, testset)
#             print ('Accuracy: {}, epoch: {}, lr: {}, batch size: {}, seed: {}'.format(accu.item(), epochs, lrs, batch_sizes, seed))
