In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger

import torch
from torch import nn
from torch.nn import functional as F
from torchmetrics.classification import MulticlassAccuracy

from src.dataloader.mnist_datamodule import MNISTDataModule

%env "WANDB_NOTEBOOK_NAME" "mnist_template"
import wandb

env: "WANDB_NOTEBOOK_NAME"="mnist_template"


In [2]:
class MNISTModel(pl.LightningModule):
    def __init__(self, lr=0.1, mean=0., sd=1.):
        super().__init__()

        # initial args
        self.lr = lr
        self.metric = MulticlassAccuracy(num_classes=10)
        self.mean = mean
        self.sd = sd

        # define model
        self.model = nn.Sequential(
            nn.Linear(784, 784), 
            nn.ReLU(),
            nn.Linear(784, 784), 
            nn.ReLU(),
            nn.Linear(784, 10)
        )

        self.save_hyperparameters()

    def forward(self, x):
        return self.model(x.flatten(1))

    def _common_step(self, batch, batch_idx, stage :str):
        x, y = batch
        logits = self.forward(x)
        loss = F.cross_entropy(logits, y)

        with torch.no_grad():
            acc = self.metric(logits, y)

            self.log(f'{stage}/loss', loss)
            self.log(f'{stage}/accuracy', acc)

        return loss, logits

    def training_step(self, batch, batch_idx):
        loss, _ = self._common_step(batch, batch_idx, 'train')
        return loss
    
    def validation_step(self, batch, batch_idx):
        loss, _ = self._common_step(batch, batch_idx, 'val')

    def test_step(self, batch, batch_idx):
        loss, logits = self._common_step(batch, batch_idx, 'test')
        x, y = batch

        images = []
        captions = []
        for i in range(1):
            img = x[i]*self.sd + self.mean
            pred = F.softmax(logits[i], dim=0)
            images.append(img)
            captions.append('predictions: ' + str(pred.detach().cpu().numpy().round(2)))

        self.logger.log_image(key='test_predictions', images=images, caption=captions)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [3]:
wandb.login()
logger = WandbLogger(
    project="mnist_template",
    name='feedforward',
    log_model=True,
    save_dir='../logs',
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkoero[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
mnist = MNISTDataModule()
mnist.prepare_data()
mnist.setup()

In [5]:
model = MNISTModel(lr=0.01, mean=mnist.default_mean, sd=mnist.default_sd)

In [6]:
trainer = Trainer(
    max_epochs=3,
    accelerator='gpu',
    accumulate_grad_batches=1,
    log_every_n_steps=10,
    logger=logger
)
trainer.fit(
    model = model,
    datamodule=mnist
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type               | Params
----------------------------------------------
0 | metric | MulticlassAccuracy | 0     
1 | model  | Sequential         | 1.2 M 
----------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.955     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.


In [7]:
trainer.test(
    model = model,
    datamodule=mnist
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test/loss': 0.17657946050167084, 'test/accuracy': 0.9511427879333496}]

In [8]:
wandb.finish()

VBox(children=(Label(value='14.222 MB of 14.222 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████
test/accuracy,▁
test/loss,▁
train/accuracy,▄▂▁▂▅▃▄▅▄▆█▅▅▆▇▆▆▇▄▆▄▅▇▇▇▇█▆▆▇▅▇▄▆▆▆▆▆▇▇
train/loss,▆█▇█▄▆▄▄▄▂▁▃▅▂▄▃▃▂▅▄▄▃▁▁▁▂▁▁▃▂▃▁▄▂▂▃▃▄▁▁
trainer/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇█████████████████
val/accuracy,▄▁█
val/loss,▆█▁

0,1
epoch,3.0
test/accuracy,0.95114
test/loss,0.17658
train/accuracy,0.95682
train/loss,0.13785
trainer/global_step,1290.0
val/accuracy,0.94785
val/loss,0.18277
