In [74]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as opt

import tarfile

import os
import cv2

import torchvision
from torchvision import transforms
from torchvision.datasets import VisionDataset
from torchvision.models import alexnet

from torch.utils.data import Subset, DataLoader

from math import ceil

from torch.backends import cudnn
from tqdm import tqdm

DEVICE = 'cuda'

In [75]:
#with tarfile.open('caltech-101/101_ObjectCategories.tar.gz', 'r:gz') as tar_f:
#    tar_f.extractall('data/')
#
#with tarfile.open('caltech-101/Annotations.tar', 'r:') as tar_f:
#    tar_f.extractall('data/')

In [76]:
from PIL import Image


def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

def create_label_dict(labels):

    label_to_int = {}
    int_to_label = {}
    label_set = sorted(list(set(labels)), key= lambda x: x.lower())

    for i, l in enumerate(label_set):

            label_to_int[l] = i
            int_to_label[i] = l
    
    return label_to_int, int_to_label

In [77]:
class Caltech(VisionDataset):
    def __init__(self, root, split='train', background_class= False, transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split

        self.images = []
        self.str_labels = []

        split_path = 'train.txt' if split == 'train' else 'test.txt'

        with open(split_path, 'r') as f:
            for line in f:
                cat = line.split('/')[0]
                if background_class or cat != 'BACKGROUND_Google':
                    self.images.append(pil_loader('data/101_ObjectCategories/' + line.strip('\n')))
                    self.str_labels.append(cat)
        
        self.label_to_int, self.int_to_label = create_label_dict(self.str_labels)

        self.labels = [self.label_to_int[l] for l in self.str_labels]

    def __getitem__(self, index):

        image = self.images[index]
        label = self.labels[index]

        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self): return len(self.labels)

    def get_label(self, index): return self.int_to_label[index]

    def info(self): print(f'{self.split} set - N: {len(self.labels)} - L: {len(self.label_to_int)}')

In [78]:
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),# Normalizes tensor with mean and standard deviation
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [79]:
train_DS = Caltech('', split= 'train', background_class= False, transform= train_transform)
test_dataset = Caltech('', split= 'test', background_class= False, transform= eval_transform)

In [80]:
print(len(train_DS))
print(train_DS[0][1])
print(train_DS.int_to_label[train_DS[0][1]])
print(train_DS.label_to_int['bass'])
train_DS.info()

5784
0
accordion
5
train set - N: 5784 - L: 101


In [81]:
train_indexes = [] # split the indices for your train split
val_indexes = []# split the indices for your val split
c = train_DS[0][1]
tmp_idx = [0]
for i in range(1, len(train_DS)):
    if train_DS[i][1] == c: tmp_idx.append(i)
    else:
        for cc in tmp_idx[:-ceil(len(tmp_idx)/4)]: train_indexes.append(cc)
        for cc in tmp_idx[-ceil(len(tmp_idx)/4):]: val_indexes.append(cc)
        c = train_DS[i][1]
        tmp_idx = [i]

train_dataset = Subset(train_DS, train_indexes)
val_dataset = Subset(train_DS, val_indexes)

print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 4269
Valid Dataset: 1475
Test Dataset: 2893


In [82]:
BATCH_SIZE = 256

LR = 1e-2

MOMENTUM = 0.9

WEIGHT_DECAY = 5e-5

STEP_SIZE = 20
GAMMA = 0.1

NUM_EPOCHS = 50

LOG_FREQUENCY = 10

In [83]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [84]:
net = alexnet(weights= 'AlexNet_Weights.DEFAULT')

net.classifier[6] = nn.Linear(4096, len(train_DS.label_to_int))

for param in net.features[:6].parameters():
    param.requires_grad = False

In [85]:
criterion = nn.CrossEntropyLoss()

parameters_to_optimize = net.parameters()

optimizer = opt.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
#optimizer = opt.Adam(parameters_to_optimize, lr = LR)

scheduler = opt.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [86]:
print(torch.cuda.is_available())

num_cuda_devices = torch.cuda.device_count()

print(f"Number of CUDA devices: {num_cuda_devices}")

True
Number of CUDA devices: 1


In [87]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_last_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step()

Starting epoch 1/50, LR = [0.01]
Step 0, Loss 4.524649620056152
Step 10, Loss 1.637069582939148
Starting epoch 2/50, LR = [0.01]
Step 20, Loss 0.5294324159622192
Step 30, Loss 0.4342718720436096
Starting epoch 3/50, LR = [0.01]
Step 40, Loss 0.22135871648788452
Starting epoch 4/50, LR = [0.01]
Step 50, Loss 0.12436827272176743
Step 60, Loss 0.11563487350940704
Starting epoch 5/50, LR = [0.01]
Step 70, Loss 0.06882558763027191
Starting epoch 6/50, LR = [0.01]
Step 80, Loss 0.04028870537877083
Step 90, Loss 0.04271889850497246
Starting epoch 7/50, LR = [0.01]
Step 100, Loss 0.04260551556944847
Step 110, Loss 0.01463178638368845
Starting epoch 8/50, LR = [0.01]
Step 120, Loss 0.016040584072470665
Starting epoch 9/50, LR = [0.01]
Step 130, Loss 0.017250273376703262
Step 140, Loss 0.020122509449720383
Starting epoch 10/50, LR = [0.01]
Step 150, Loss 0.008123449049890041
Starting epoch 11/50, LR = [0.01]
Step 160, Loss 0.014727122150361538
Step 170, Loss 0.013463119976222515
Starting epoch 1

In [88]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(val_dataset))

print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 6/6 [00:03<00:00,  1.91it/s]

Validation Accuracy: 0.8664406779661017





In [89]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:06<00:00,  1.91it/s]

Test Accuracy: 0.8683027998617352



