# Assignment: Vision Transformers on CIFAR10

In [1]:
#imports
from __future__ import print_function
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils


In [2]:
#loading the dataset
dataset = dset.CIFAR10(root="./data", download=True,
                           transform=transforms.Compose([
                               transforms.Resize(64),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ]))
nc=3

dataloader = torch.utils.data.DataLoader(dataset, batch_size=128,
                                         shuffle=True, num_workers=2)


In [3]:
#checking the availability of cuda devices
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Tasks:
* try to get the best test Accuracy on Cifar10 using a transformer model
* pre-trained models allowed - see [here](https://docs.pytorch.org/vision/main/models/vision_transformer.html) for list of models in TorchVision
* **hint**: just like with the CNN in Week 5 - wee need to change the classification layer to fit our 10 class CIFAR-10 problem before we can fine-tune it...
* **hint**: Transformers need a lot of compute + memory - use the A100 GPU



In [4]:
 !pip install -q timm pandas requests

In [5]:

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from timm.models import create_model

# Modell definieren (mit Upsample-Schicht vorne)
class DeiTWithUpsample(nn.Module):
    def __init__(self):
        super().__init__()
        self.upsample = nn.Upsample(size=(224, 224), mode='bilinear', align_corners=False)
        self.model = create_model('deit_base_patch16_224', pretrained=True)
        self.model.head = nn.Linear(self.model.head.in_features, 10)

    def forward(self, x):
        x = self.upsample(x)
        return self.model(x)








In [6]:
# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeiTWithUpsample().to(device)

# Daten ohne Resize laden
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Loss und Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [8]:
num_epochs = 5

for epoch in range(num_epochs):
    # Training
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(trainloader)
    train_acc = 100. * correct / total



    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")

print("Training abgeschlossen.")


Epoch 1/5, Train Loss: 0.1020, Train Acc: 96.74%
Epoch 2/5, Train Loss: 0.0463, Train Acc: 98.47%
Epoch 3/5, Train Loss: 0.0358, Train Acc: 98.79%
Epoch 4/5, Train Loss: 0.0289, Train Acc: 99.07%
Epoch 5/5, Train Loss: 0.0262, Train Acc: 99.08%
Training abgeschlossen.
