In [None]:
! pip install lightning torchmetrics transformers rich -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m764.8/764.8 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.3/71.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m660.0/660.0 kB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
! cp /content/drive/MyDrive/ERAv1/S17/S17.zip .
! unzip S17.zip

! cp -r /content/S17/pizza_steak_sushi .

In [None]:
import os
import torch
import random
import logging
import numpy as np
import torchmetrics
from torch import nn
from os.path import exists
import lightning.pytorch as pl
from collections import Counter
from torch.nn import functional as F
from dataset import create_dataloaders
from torch.utils.data import DataLoader
from transformer import EncoderTransformer
from torchvision import transforms, datasets
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import LearningRateMonitor, RichProgressBar

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

image_path = "pizza_steak_sushi"
train_dir = image_path + "/train"
test_dir = image_path + "/test"

IMG_SIZE = 224

# Create transform pipeline manually
manual_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])
print(f"Manually created transforms: {manual_transforms}")


train_dataloader, test_dataloader, class_names = create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,
    batch_size=32
)

train_dataloader, test_dataloader, class_names

Manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
    ToTensor()
)


(<torch.utils.data.dataloader.DataLoader at 0x7d2c2df8feb0>,
 <torch.utils.data.dataloader.DataLoader at 0x7d2c2df8fca0>,
 ['pizza', 'steak', 'sushi'])

In [None]:
# Get a batch of images
image_batch, label_batch = next(iter(train_dataloader))

# Get a single image from the batch
image, label = image_batch[0], label_batch[0]

# View the batch shapes
image.shape, label

(torch.Size([3, 224, 224]), tensor(1))

In [None]:
img_size=224
in_channels=3
patch_size=16
num_transformer_layers=12
embedding_dim=768
mlp_size=3072
num_heads=12
attn_dropout=0
mlp_dropout=.01
embedding_dropout=0.1
num_classes=3

assert img_size % patch_size == 0, f"Image size must be divisible by patch size, image size: {img_size}, patch size: {patch_size}."

# 4. Calculate number of patches (height * width/patch^2)
num_patches = (img_size * img_size) // patch_size**2

In [None]:
class ViTLightning(pl.LightningModule):
  def __init__(self, seq_len=196, embed_dim=768, n_heads=8, n_layers=6, ff_size=3072, dropout=0.1, num_classes=3):
    super().__init__()
    self.seq_len = seq_len
    self.embed_dim = embed_dim
    self.n_heads = n_heads
    self.n_layers = n_layers
    self.ff_size = ff_size
    self.dropout = dropout
    self.num_classes = num_classes
    self.accuracy_fn = torchmetrics.classification.Accuracy(task="multiclass", num_classes=num_classes)
    self.model = EncoderTransformer(n_layers, n_heads, embed_dim, ff_size, seq_len, num_class=num_classes, dropout=dropout, isBERT=False)
    self.criterion = torch.nn.CrossEntropyLoss()
    self.save_hyperparameters()

  def forward(self, x):
    return self.model(x)

  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(),
                                 lr=3e-3,
                                 betas=(0.9, 0.999),
                                 weight_decay=0.3)
    return(optimizer)


  def training_step(self, batch, batch_idx):
    x, y = batch
    out = self(x)

    loss = self.criterion(out, y)
    acc = self.accuracy_fn(out, y)

    self.log('train_loss', loss, prog_bar=True, on_epoch=True, on_step=True, logger=True)
    self.log('train_acc', acc.item(), prog_bar=True, on_epoch=True, on_step=True, logger=True)
    return loss


  def validation_step(self, batch, batch_idx):
    x, y = batch
    out = self(x)

    loss = self.criterion(out, y)
    acc = self.accuracy_fn(out, y)

    self.log('val_loss', loss, prog_bar=True, on_epoch=True, on_step=True, logger=True)
    self.log('val_acc', acc.item(), prog_bar=True, on_epoch=True, on_step=True, logger=True)

In [None]:
trainer = pl.Trainer(
                     log_every_n_steps=1,
                     check_val_every_n_epoch=1,
                     enable_model_summary=True,
                     max_epochs=11,
                     accelerator='auto',
                     devices=1 if torch.cuda.is_available() else None,
                     logger=[TensorBoardLogger("logs/", name="ViT")],
                     callbacks=[LearningRateMonitor(logging_interval="step"),
                                RichProgressBar(leave=True)]
                     )

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
model = ViTLightning()

trainer.fit(model, train_dataloader, test_dataloader)
trainer.validate(model, test_dataloader)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

INFO: `Trainer.fit` stopped: `max_epochs=11` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=11` reached.


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

[{'val_loss_epoch': 1.082227110862732, 'val_acc_epoch': 0.41333332657814026}]