In [None]:
!pip install torch
!pip install torchvision
!pip install optuna

!pip install pytorch-lightning
# fix for collab env
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi

In [None]:
# DATASET LOAD
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch import utils
import os

trans = transforms.Compose([
  transforms.Resize((128,128)),
  transforms.ToTensor(),
])

# TODO should do if gpu check
kwargs = { "pin_memory": True, "num_workers": os.cpu_count() }
dataset_train = CIFAR10(root="/content/cifar", download=True, transform=trans)
dataset_test = CIFAR10(root="/content/cifar", train=False, transform=trans)
train_loader = utils.data.DataLoader(dataset_train, **kwargs)
test_loader = utils.data.DataLoader(dataset_test, **kwargs)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12382136.61it/s]


Extracting /content/cifar/cifar-10-python.tar.gz to /content/cifar


In [None]:
import torch
from torch import optim, nn,  utils, Tensor
from itertools import repeat
from torchsummary import summary
from torchvision import models

print(torch.hub.help("pytorch/vision", "resnet18"))

# import pprint
# pprint.pp(torch.hub.list("pytorch/vision"))

# print("\n".join(torch.hub.list("pytorch/vision")))


# pretrained via string
# model = torch.hub.load("pytorch/vision", "resnet18", weights="IMAGENET1K_V1")

# pretrained via torchhub enum
weights = torch.hub.load("pytorch/vision", "get_weight", weights="ResNet34_Weights.IMAGENET1K_V1")
# model_pre = torch.hub.load("pytorch/vision", "resnet34", weights=weights)

# untrained
# model = torch.hub.load("pytorch/vision", "resnet34")
# model = torch.hub.load("pytorch/vision", "resnet18")

# pretrained via model method
# model = models.resnet18(pretrained=True)

# untrained via model method (False is defaule)
# model = models.resnet18(pretrained=False)

summary(model.cuda(), (3, 128, 128))

ResNet-18 from `Deep Residual Learning for Image Recognition <https://arxiv.org/abs/1512.03385>`__.

    Args:
        weights (:class:`~torchvision.models.ResNet18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.ResNet18_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.resnet.ResNet``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.ResNet18_Weights
        :members:
    
----------------------------------------------------------------
        Layer (type)               Output Shape       

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


In [None]:
### LIGHTNING
from pytorch_lightning import Trainer
import pytorch_lightning as pl

class LightningResNet(pl.LightningModule):
  def __init__(self, backbone, learning_rate=0.1, batch_size=1024):
    super().__init__()
    # saves all args as hyper params that can then be accessed as self.ARG
    self.save_hyperparameters()
    self.backbone = backbone
    self.loss = nn.CrossEntropyLoss()

  def forward(self, x):
    return self.backbone(x)

  # MUST RETURN THE LOSS
  def training_step(self, batch, batch_idx):
    x, y = batch
    scores = self.forward(x)
    loss = self.loss(scores, y)

    # Logging to TensorBoard (if installed) by default
    self.log('train_loss', loss)
    return loss


  def validation_step(self, batch, batch_idx):
    x, y = batch
    scores = self.forward(x)
    loss = self.loss(scores, y)

    # calculate acc
    labels_hat = torch.argmax(scores, dim=1)
    val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)

    # log the outputs!
    self.log_dict({'val_loss': loss, 'val_acc': val_acc})


  def test_step(self, batch, batch_idx):
    x, y = batch
    scores = self.forward(x)
    loss = self.loss(scores, y)

    # JUST USE TORCH METRICS IN THE FUTURE
    # calculate acc
    labels_hat = torch.argmax(scores, dim=1)
    test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)

    # log the outputs!
    self.log_dict({'test_loss': loss, 'test_acc': test_acc})


  # MUST RETURN THE OPTIMIZER
  def configure_optimizers(self):
    optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate, weight_decay=0.0001 )
    return optimizer

  def train_dataloader(self): #
    return utils.data.DataLoader(dataset_train, batch_size=self.hparams.batch_size)

  def val_dataloader(self): #
    return utils.data.DataLoader(dataset_test, batch_size=self.hparams.batch_size)

  def test_dataloader(self): #
    return utils.data.DataLoader(dataset_test, batch_size=self.hparams.batch_size)


In [None]:
### TRAIN DEFAULT
import torch
from pytorch_lightning.loggers import tensorboard

# may be default_root_dir
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
checkpoint_callback = ModelCheckpoint(dirpath=checkpoint_dir, save_top_k=3, monitor="val_loss")
# note as I learned if the logger has a default dir it will prefer that OVER DEFAULT ROOT DIR but manually setting dirpath fixes that
# https://github.com/Lightning-AI/pytorch-lightning/blob/90d04b5b86f37994cdceccc6de32f0e93b1cc7f0/src/lightning/pytorch/callbacks/model_checkpoint.py#L623
# trainer = Trainer(callbacks=[checkpoint_callback], log_every_n_steps=10, default_root_dir=checkpoint_dir)
trainer = Trainer(callbacks=[checkpoint_callback], log_every_n_steps=10)

# # automatically restores model, epoch, step, LR schedulers, etc...
# trainer.fit(model, ckpt_path="some/path/to/my_checkpoint.ckpt")
trainer.fit(model) # loaders as part of module should seperate to datamodule at some point

checkpoint_callback.best_model_path

In [None]:
### OPTUNA
import optuna
from pytorch_lightning import Trainer

def objective(trial):

    # Suggest a learning rate
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)

    # Set model's learning rate at creation
    model = LightningResNet(learning_rate = lr, batch_size = 2048)

    # Assuming you have a DataLoader instance named `train_dataloader`
    trainer = Trainer(max_epochs=10, limit_train_batches=100, limit_val_batches=10, limit_test_batches=100)
    trainer.fit(model, train_loader)

    global results
    results = trainer.test(model, test_loader)
    # Return the validation loss or any other metric you want to optimize
    # dont know why this returns a list with a dict inside it...
    return results[0]["test_loss"]

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

# Print the best trial
print(study.best_trial.params)


# Just testing graphing in netron vs torchviz

In [None]:
!pip install onnx
!pip install onnxscript
!pip install -q netron

In [None]:
torch_input = torch.randn(1, 3, 32, 32)
output_path = "/content/restnet34.onnx"
torch.onnx.export(model, torch_input, output_path, input_names=["image"], output_names=["output"])

In [None]:
import os
import torch
import netron
import portpicker
from google.colab import output

# model should come from another block
# output_path = "/content/output.pth"
# torch.save(model.state_dict(), output_path)

port = portpicker.pick_unused_port()

# Read the model file and start the netron browser.
with output.temporary():
  netron.start(output_path, port, browse=True)

output.serve_kernel_port_as_iframe(port, height='800')