In [None]:
import torch
#dataset link: https://www.robots.ox.ac.uk/~vgg/data/pets/ || https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz || https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz
from torchvision.datasets import OxfordIIITPet
import matplotlib.pyplot as plt
from random import random
from torchvision.transforms import Resize, ToTensor
from torchvision.transforms.functional import to_pil_image

In [None]:
to_tensor = [Resize((224, 224)), ToTensor()] #pre-processing

In [None]:
to_tensor

[Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn),
 ToTensor()]

In [None]:
import torchvision
from torchvision import transforms
import PIL
transforms = [
    Resize((224, 224)),
    # transforms.RandomCrop(50, padding=1),
    # transforms.RandomGrayscale(p=0.1),
    # transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(20),
    # transforms.RandomRotation(degrees=(0, 20)),
    transforms.AutoAugment(),
    ToTensor()
]

In [None]:
print(transforms)

[Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn), AutoAugment(policy=AutoAugmentPolicy.IMAGENET, fill=None), ToTensor()]


In [None]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image = t(image)
        return image, target

def show_images(images, num_samples=40, cols=8):
    """ Plots some samples from the dataset """
    plt.figure(figsize=(15,15))
    idx = int(len(dataset) / num_samples)
    print(images)
    for i, img in enumerate(images):
        if i % idx == 0:
            plt.subplot(int(num_samples/cols) + 1, cols, int(i/idx) + 1)
            plt.imshow(to_pil_image(img[0]))

dataset = OxfordIIITPet(root=".", download=True, transforms=Compose(to_tensor))
# new_dataset = OxfordIIITPet(root=".", download=True, transforms=Compose(transforms))

# show_images(dataset)


Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:06<00:00, 126405530.90it/s]


Extracting oxford-iiit-pet/images.tar.gz to oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:00<00:00, 112132641.18it/s]


Extracting oxford-iiit-pet/annotations.tar.gz to oxford-iiit-pet


In [None]:
from torchvision.models import vit_b_16

In [None]:
from torch.utils.data import DataLoader
from torch.utils.data import random_split

train_split = int(0.8 * len(dataset))
train, test = random_split(dataset, [train_split, len(dataset) - train_split])

train_dataloader = DataLoader(train, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test, batch_size=32, shuffle=True)

In [None]:
import torch.optim as optim
from torch import nn
from torch import Tensor
import numpy as np


device = "cuda"

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.model = vit_b_16(weights='DEFAULT')
        self.model.heads = nn.Linear(768, 37)
        self.model.heads.weight.data.normal_(mean=0.5, std=0.5)
        self.model.heads.bias.data.zero_()

    def forward(self, x: torch.Tensor):
            # Reshape and permute the input tensor
            x = self.model._process_input(x)
            n = x.shape[0]

            # Expand the class token to the full batch
            batch_class_token = self.model.class_token.expand(n, -1, -1)
            x = torch.cat([batch_class_token, x], dim=1)

            x = self.model.encoder(x)

            # Classifier "token" as used by standard language architectures
            x = x[:, 0]

            x = self.model.heads(x)

            return x

model = Net().to(device)
# print(model)

optimizer = optim.AdamW(model.parameters(), lr=0.01, betas=(0.9, 0.999), weight_decay = 0.1)
criterion = nn.CrossEntropyLoss()

test_loss = 10
best_loss = 10

# #use for the test loss calculation
# epoch_losses_test = []

for epoch in range(100):
    epoch_losses = []
    model.train()
    for step, (inputs, labels) in enumerate(train_dataloader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        #get training loss values
        epoch_losses.append(loss.item())


    if epoch % 5 == 0:
        #calculating training loss
        print(f">>> Epoch {epoch} train loss: ", np.mean(epoch_losses))

        # #get the last_test_lost
        # last_test_loss = np.mean(epoch_losses_test)

        #initialize for this one
        epoch_losses_test = []

        model.eval()

        for step, (inputs, labels) in enumerate(test_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            #get this test lost values
            epoch_losses_test.append(loss.item())

        #calculating testloss
        test_loss = np.mean(epoch_losses_test)

        print(f">>> Epoch {epoch} test loss: ", test_loss)

        #saving files
        if(best_loss > test_loss):
              torch.save(model.state_dict(), 'best.pth')
              torch.save(model, 'model_best.pth')
              print("Model saved best ckpt")
              best_loss = test_loss

        torch.save(model.state_dict(), 'last.pth')
        torch.save(model, 'model_last.pth')
        print("Model saved last ckpt")

>>> Epoch 0 train loss:  13.156557899454366
>>> Epoch 0 test loss:  4.871868237205174
Model saved best ckpt
Model saved last ckpt
>>> Epoch 5 train loss:  3.8151700418928396
>>> Epoch 5 test loss:  3.761606962784477
Model saved best ckpt
Model saved last ckpt
>>> Epoch 10 train loss:  3.6614993292352427
>>> Epoch 10 test loss:  3.635543325672979
Model saved best ckpt
Model saved last ckpt
>>> Epoch 15 train loss:  3.6319891007050225
>>> Epoch 15 test loss:  3.6253922089286474
Model saved best ckpt
Model saved last ckpt
>>> Epoch 20 train loss:  3.6005941836730293
>>> Epoch 20 test loss:  3.61181902885437
Model saved best ckpt
Model saved last ckpt
>>> Epoch 25 train loss:  3.571427239024121
>>> Epoch 25 test loss:  3.5922998760057534
Model saved best ckpt
Model saved last ckpt
>>> Epoch 30 train loss:  3.550463943377785
>>> Epoch 30 test loss:  3.5587729889413584
Model saved best ckpt
Model saved last ckpt
>>> Epoch 35 train loss:  3.5353263953457708
>>> Epoch 35 test loss:  3.60588737

In [None]:
model.eval()
inputs, labels = next(iter(train_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([22, 31, 31, 31, 22, 31, 31, 31,  3,  3, 31, 22,  0, 31, 22, 22, 22, 31,
        31,  3, 31, 31, 31, 22, 22, 31, 31, 22, 22, 31, 22,  3],
       device='cuda:0')
Actual classes tensor([27, 22,  6, 14,  9,  0, 26, 23,  1, 20, 23, 15,  9, 12, 12, 29, 11, 14,
        35,  7, 17,  5, 13,  3, 34, 30, 15, 26, 28,  1, 24, 33],
       device='cuda:0')


In [None]:
inputs, labels = next(iter(test_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([27, 22, 22, 31, 31, 31,  3,  3, 22,  0, 22, 31, 22, 22,  3, 22, 22,  3,
        22, 27, 22, 22, 31, 31, 22, 31, 31, 31, 31, 31, 22, 31],
       device='cuda:0')
Actual classes tensor([13, 29,  4, 20, 13, 20, 15,  2,  2, 14,  0, 35, 16, 13, 33, 29, 10, 14,
        34, 36, 22, 10, 17, 14, 18, 16, 14, 19, 25, 20, 28,  9],
       device='cuda:0')


In [None]:
model.load_state_dict(torch.load('best.pth'))
inputs, labels = next(iter(train_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([18, 34, 34, 34, 34, 34, 27, 34, 34,  7,  7,  3,  0, 18,  3,  3,  7,  7,
        23, 34, 18,  7,  7,  3,  3,  0,  7,  0, 18,  3, 17, 18],
       device='cuda:0')
Actual classes tensor([27, 27, 30,  9,  8, 18, 10, 34, 23,  3,  4, 20, 16, 18,  7,  8,  7,  6,
        22,  8,  3,  4, 12,  6, 29, 16, 17, 12, 10, 22, 34, 16],
       device='cuda:0')


In [None]:
inputs, labels = next(iter(test_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)


print("Predicted classes", outputs.argmax(-1))
print("Actual classes", labels)

Predicted classes tensor([23, 17,  3,  0, 18, 34,  0,  3,  0, 18, 34, 34,  3,  7,  7, 34, 34, 34,
        34,  3,  7, 34, 34,  3, 23, 27, 17, 34, 18, 17,  0, 18],
       device='cuda:0')
Actual classes tensor([21, 10,  2, 16, 31, 32, 11, 28, 34, 18,  5, 19, 33, 15, 16,  8, 21,  0,
         2, 12, 24, 20, 34, 10, 31, 12,  6, 35, 10, 14, 24, 27],
       device='cuda:0')
