In [1]:
import torch
#dataset link: https://www.robots.ox.ac.uk/~vgg/data/pets/ || https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz || https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz
from torchvision.datasets import OxfordIIITPet
import matplotlib.pyplot as plt
from random import random
from torchvision.transforms import Resize, ToTensor
from torchvision.transforms.functional import to_pil_image

In [2]:
to_tensor = [Resize((224, 224)), ToTensor()] #pre-processing

In [3]:
to_tensor

[Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn),
 ToTensor()]

In [4]:
import torchvision
from torchvision import transforms
import PIL
transforms = [
    Resize((224, 224)),
    transforms.AutoAugment(),
    ToTensor()
]

In [5]:
print(transforms)

[Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn), AutoAugment(policy=AutoAugmentPolicy.IMAGENET, fill=None), ToTensor()]


In [6]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image = t(image)
        return image, target

def show_images(images, num_samples=40, cols=8):
    """ Plots some samples from the dataset """
    plt.figure(figsize=(15,15))
    idx = int(len(dataset) / num_samples)
    print(images)
    for i, img in enumerate(images):
        if i % idx == 0:
            plt.subplot(int(num_samples/cols) + 1, cols, int(i/idx) + 1)
            plt.imshow(to_pil_image(img[0]))

dataset = OxfordIIITPet(root=".", download=True, transforms=Compose(to_tensor))
# new_dataset = OxfordIIITPet(root=".", download=True, transforms=Compose(transforms))
# show_images(dataset)


Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:22<00:00, 35172196.82it/s]


Extracting oxford-iiit-pet/images.tar.gz to oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:01<00:00, 14033939.95it/s]


Extracting oxford-iiit-pet/annotations.tar.gz to oxford-iiit-pet


In [7]:
from torchvision.models import vit_b_16

In [8]:
from torch.utils.data import DataLoader
from torch.utils.data import random_split

train_split = int(0.8 * len(dataset))
train, test = random_split(dataset, [train_split, len(dataset) - train_split])

train_dataloader = DataLoader(train, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test, batch_size=32, shuffle=True)

In [24]:
print(len(train))
print(len(test))

2944
736


In [18]:
import torch.optim as optim
from torch import nn
from torch import Tensor
import numpy as np


device = "cuda"

# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.model = vit_b_16(weights='IMAGENET1K_V1')
#         for param in self.model.parameters():
#              param.requires_grad = False
#         self.model.heads = nn.Linear(768, 37)
#         self.model.heads.weight.data.normal_(mean=0.5, std=0.5)
#         self.model.heads.bias.data.zero_()
#     def forward(self, x: torch.Tensor):
#         x = self.model(x)
#         return x


model = vit_b_16(weights='IMAGENET1K_V1')
for param in model.parameters():
    param.requires_grad = False
model.heads = nn.Linear(768, 37)

model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=0.01, betas=(0.9, 0.999), weight_decay = 0.1)
criterion = nn.CrossEntropyLoss()

test_loss = 10
best_loss = 10

# #use for the test loss calculation
# epoch_losses_test = []

for epoch in range(10):
    epoch_losses = []
    model.train()
    for step, (inputs, labels) in enumerate(train_dataloader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        #get training loss values
        epoch_losses.append(loss.item())


        #calculating training loss
    print(f">>> Epoch {epoch} train loss: ", np.mean(epoch_losses))

    # #get the last_test_lost
    # last_test_loss = np.mean(epoch_losses_test)

    #initialize for this one
    epoch_losses_test = []

    model.eval()

    for step, (inputs, labels) in enumerate(test_dataloader):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        #get this test lost values
        epoch_losses_test.append(loss.item())

    #calculating testloss
    test_loss = np.mean(epoch_losses_test)

    print(f">>> Epoch {epoch} test loss: ", test_loss)

    #saving files
    if(best_loss > test_loss):
          torch.save(model.state_dict(), 'best.pth')
          torch.save(model, 'model_best.pth')
          print("Model saved best ckpt")
          best_loss = test_loss

    torch.save(model.state_dict(), 'last.pth')
    torch.save(model, 'model_last.pth')
    print("Model saved last ckpt")

>>> Epoch 0 train loss:  0.6893809615107982
>>> Epoch 0 test loss:  0.3192182534738727
Model saved best ckpt
Model saved last ckpt
>>> Epoch 1 train loss:  0.17624200724632197
>>> Epoch 1 test loss:  0.33617109173665877
Model saved last ckpt
>>> Epoch 2 train loss:  0.1015971913812277
>>> Epoch 2 test loss:  0.27952877237744955
Model saved best ckpt
Model saved last ckpt
>>> Epoch 3 train loss:  0.05725953197258565
>>> Epoch 3 test loss:  0.30976883694529533
Model saved last ckpt
>>> Epoch 4 train loss:  0.04580108966658135
>>> Epoch 4 test loss:  0.3144525265240151
Model saved last ckpt
>>> Epoch 5 train loss:  0.04738454906391385
>>> Epoch 5 test loss:  0.3097314455586931
Model saved last ckpt
>>> Epoch 6 train loss:  0.02301816628112093
>>> Epoch 6 test loss:  0.2973762372427661
Model saved last ckpt
>>> Epoch 7 train loss:  0.03411311396579866
>>> Epoch 7 test loss:  0.2734046348411104
Model saved best ckpt
Model saved last ckpt
>>> Epoch 8 train loss:  0.041152078934196056
>>> Epo

In [19]:
model.eval()
inputs, labels = next(iter(train_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([33, 11,  5, 30, 15,  9, 36, 16, 12, 19,  9, 29,  5, 26, 14, 18, 32, 11,
        33, 25, 12, 18, 12, 35,  6, 11, 23, 25, 36, 18, 21, 13],
       device='cuda:0')
Actual classes tensor([27, 11,  0, 30, 15, 27, 36, 16, 12, 19,  9, 29,  5, 26, 14, 18, 32, 11,
         0, 25, 12, 18, 12, 35,  6, 11, 23, 25, 36, 18, 21, 13],
       device='cuda:0')


In [20]:
inputs, labels = next(iter(test_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([25, 18, 32,  9, 25, 18,  2, 22,  3,  1, 13, 11, 20, 36, 15, 28,  7, 15,
        36,  7,  4, 30, 25, 26, 26,  2, 16,  8, 15, 25, 15,  1],
       device='cuda:0')
Actual classes tensor([25, 18, 32, 26, 25, 18, 34, 22,  3,  1, 13, 11, 20, 36, 15, 28,  7, 15,
        36,  7,  3, 30, 25, 26, 26,  2, 16,  8, 15, 25, 15,  1],
       device='cuda:0')


In [21]:
model.load_state_dict(torch.load('best.pth'))
inputs, labels = next(iter(train_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([23, 26,  9, 12, 24, 36, 28,  9, 32,  8, 24, 36, 23, 28, 28, 36, 26,  7,
        30, 21, 14,  1, 14, 13, 27,  5, 26, 10, 15, 25, 27, 31],
       device='cuda:0')
Actual classes tensor([23, 26,  9, 12, 24, 36, 28,  9, 32,  8, 24, 36, 23, 28, 28, 36, 26,  7,
        30, 21, 14,  1, 14, 13, 27,  5, 26, 10, 15, 25, 27, 31],
       device='cuda:0')


In [30]:
inputs, labels = next(iter(test_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1)[0])
print("Actual classes", labels)

Predicted classes tensor(22, device='cuda:0')
Actual classes tensor([22, 15, 35,  4, 33, 34, 28, 17, 10, 12,  5,  7, 24, 25, 31, 12, 28, 10,
        13, 21,  7,  4, 29, 25, 31, 20,  0,  7, 24,  4,  2, 11],
       device='cuda:0')


In [55]:
inference = OxfordIIITPet(root=".", split='test', download=True, transforms=Compose(to_tensor))

In [56]:
print(len(inference))

3669


In [57]:
inference_dataloader = DataLoader(inference, batch_size=32, shuffle=True)

In [58]:
len(inference_dataloader)

115

In [48]:
print(labels[0])

tensor(24, device='cuda:0')


In [64]:
model.load_state_dict(torch.load('best.pth'))
model.eval()
correct = 0
for step, (inputs, labels) in enumerate(inference_dataloader):
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    length = len(labels)

    for i in range(length):
        output = outputs.argmax(-1)[i]
        label = labels[i]
        if output == label:
            correct += 1

print(f"Inference Accuracy: {correct/len(inference)}")


Inference Accuracy: 0.8909784682474788


In [60]:
inputs, labels = next(iter(inference_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)

print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([20, 28, 14, 28, 23,  8, 12, 33, 10, 35, 15, 27,  6, 28,  6, 24, 23,  6,
        12,  3,  9, 31, 20, 24, 22,  8, 29, 32, 20, 12,  7, 19],
       device='cuda:0')
Actual classes tensor([26, 28, 14, 28, 23,  8, 12,  0, 10, 35, 15,  9,  6, 28,  6, 24, 23, 26,
        12,  3, 27, 31, 20, 24, 22,  8, 29, 32, 23, 12,  7, 19],
       device='cuda:0')
