In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
!pip install vit_pytorch
from vit_pytorch import ViT
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
image_size = 40,
patch_size = 4,
num_classes = 10,
dim = 64,
depth = 6,
heads = 8,
mlp_dim = 128,
dropout=0.1,
emb_dropout=0.1

In [None]:
# Data Augmentation
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [None]:
testingTrans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=train_transforms)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=testingTrans)

In [None]:
# Iterating over the datasets using dataloaders
batch_size = 4

trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True)

In [None]:
# ViT model
model = ViT(
    image_size = 40,
    patch_size = 4,
    num_classes = 10,
    dim = 64,
    depth = 6,
    heads = 8,
    mlp_dim = 128,
    dropout=0.1,
    emb_dropout=0.1,
    channels = 3
).to(device)

In [None]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.Adam(model.parameters(),lr = 0.001)

In [None]:
for epoch in range(50):
    model.train()
    train_loss = 0
    train_correct = 0
    for i, (images, labels) in enumerate(trainloader):

        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        train_correct += (predicted == labels).sum().item()

        # if (i+1) % 100 == 0:
        #     print(f'Epoch [{epoch+1}/{2}], Step [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
    
    train_loss /= len(trainloader.dataset)
    train_acc = 100. * train_correct / len(trainloader.dataset)
    print(f'Epoch {epoch + 1}/{50} Training Loss: {train_loss:.6f}, Training Accuracy: {train_acc:.2f}%')


In [None]:
model.eval()
test_loss = 0
test_correct = 0

with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        test_correct += (predicted == labels).sum().item()

test_loss /= len(testloader.dataset)
test_acc = 100. * test_correct / len(testloader.dataset)
print(f'Test Loss: {test_loss:.6f}, Test Accuracy: {test_acc:.2f}%')