In [38]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torchvision.datasets import OxfordIIITPet
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

In [63]:
class OxfordIIITPetDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = './', batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        self.dims = (3, 224, 224)
        self.dataset = None
        self.train_dataset = None
        self.val_dataset = None

    def prepare_data(self):
        OxfordIIITPet(self.data_dir, download=True)

    def setup(self, stage=None):
        self.dataset = OxfordIIITPet(self.data_dir, transform=self.transform)
        self.train_dataset, self.val_dataset = torch.utils.data.random_split(self.dataset, [round(len(self.dataset)*0.8), round(len(self.dataset)*0.2)])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)

In [73]:
class OxfordIIITPetModel(pl.LightningModule):
    def __init__(self, data_dir: str = './', batch_size: int = 32, learning_rate: float = 0.001):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(512*7*7, 37)
        )

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.FloatTensor(F.cross_entropy(y_hat, y))
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.FloatTensor(F.cross_entropy(y_hat, y))
        self.log('val_loss', loss, prog_bar=True)
        accuracy = (y_hat.argmax(dim=1) == y).float().mean()
        self.log('val_accuracy', accuracy, prog_bar=True)

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.learning_rate)

In [74]:
def seed_everything(seed: int = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [75]:
data_dir = './'
batch_size = 128
learning_rate = 3e-4

seed_everything()

data_module = OxfordIIITPetDataModule(data_dir=data_dir, batch_size=batch_size)
model = OxfordIIITPetModel(data_dir=data_dir, batch_size=batch_size, learning_rate=learning_rate)

checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='./checkpoints',
    filename='best-checkpoint',
    save_top_k=1,
    mode='min',
)
logger = TensorBoardLogger('lightning_logs', name='OxfordIIITPet')
trainer = pl.Trainer(
    accelerator='gpu',
    max_epochs=60,
    callbacks=[checkpoint_callback],
    logger=logger,
    accumulate_grad_batches=11,
)
trainer.fit(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 2.5 M 
-------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
9.987     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


TypeError: expected TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)) (got TensorOptions(dtype=float, device=cuda:0, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)))

In [None]:
trainer.test()

In [None]:
# ReLU -