<a href="https://colab.research.google.com/github/KajetanFrackowiak/WeightBiasesLearn/blob/main/Simple_Pytorch_Integration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

W&B + PyTorch = 🔥

In [None]:
!pip install wandb onnx -Uq

In [4]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# remove slow mirror from list of MNIST mirrors
torchvision.datasets.MNIST.mirrors = [mirror for mirror in torchvision.datasets.MNIST.mirrors
                                      if not mirror.startswith("http://yann.lecun.com")]

In [5]:
%%capture
!pip install wandb --upgrade

In [7]:
import wandb

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

##Define the Experiment and Pipeline

In [8]:
config = dict(
    epochs=5,
    classes=10,
    kernels=[16, 32],
    batch_size=128,
    learning_rate=0.005,
    dataset='MNIST',
    architecture="CNN"
)

In [9]:
def model_pipeline(hyperparameters):

  # Tell wandb to get started
  with wandb.init(project="pytorch-demo", config=hyperparameters):
    # access all HPs through wandb.config, so loggin matches execution!
    config = wandb.config

    # make the model, data, and optimization problem
    model, train_loader, test_loader, criterion, optimizer = make(config)

    # and use them to train the model
    train(model, train_loader, criterion, optimizer, config)

    # and test its final performance
    test(model, test_loader)

  return model

In [10]:
def make(config):
  # Make the data
  train, test = get_data(train=True), get_data(train=False)
  train_loader = make_loader(train, batchsize=config.batch_size)
  test_loader = make_loader(test, batchsize=config.batch_size)

  # Make the model
  model = ConvNet(config.kernels, config.classes).to(device)

  # Make the loss and optimizer
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(
      model.parameter(), lr=config.learning_rate)

  return model, train_loader, test_loader, criterion, optimizer

In [12]:
# Conventional and convolutional neural network

class ConvNet(nn.Module):
  def __init__(self, kernels, classes=10):
    super(ConvNet, self).__init__()

    self.layer1 = nn.Sequential(
        nn.Conv2d(1, kernels[0], kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))
    self.layer2 = nn.Sequential(
        nn.Conv2d(16, kernels[1], kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))
    self.fc = nn.Linear(7 * 7 * kernels[-1], classes)

  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(x)
    out = out.reshape(out.size(0), -1)
    out = self.fc(out)
    return out

## Define Training logic

In [13]:
def train(model, loader, criterion, optimizer, config):
  # Tell wandb to watch what the model gets up to: gradients, weight and model
  wandb.watch(model, criterion, log="all", log_frq=10)

  # Run training and track with wandb
  total_batches = len(loader) * config.epochs
  example_ct = 0  # Number of examples seen
  batch_ct = 0
  for epoch in tqdm(range(config.epochs)):
    for _, (images, labels) in enumerate(loader):

      loss = train_batch(images, labels, model, optimizer, criterion)
      example_ct += len(images)
      batch_ct += 1

      # Report metrics every 25th batch
      if ((batch_ct + 1) % 25) == 0:
        train_log(loss, example_ct, epoch)

def train_batch(images, labels, model, optimizer, criterion):
  images, lables = images.to(device), labels.to(device)

  # Forward pass ->
  outputs = model(images)
  loss = criterion(outputs, labels)

  # Backward pass ->
  optimizer.zero_grad()
  loss.backward()

  # Step with optimizer
  optimizer.step()

  return loss

In [14]:
def train_log(loss, example_ct, epoch):
  loss = float(loss)

  # where the magic happens
  wandb.loss({"epoch": epoch, "loss": loss}, step=example_ct)
  print(f"Loss after " + str(example_ct).zfill(5) + f" examples: {loss:.3f}")

##Define Testing Logic

In [16]:
def test(model, test_loader):
  model.eval()

  # Run the model on some test examples
  with torch.no_grad():
    correct, total = 0, 0
    for images, labels in test_loader:
      images, labels = images.to(device), labels.to(device)
      outputs =  model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    print(f"Accuracy of the model on the {total}" +
          f"test images: {100 * correct / total}%")

    wandb.log({"test_accuracy": correct / total})

  # Save the model in the exchangeable ONNX format
  torch.onnx.export(model, images, "model.onnx")
  wandb.save("model.onnx")

In [17]:
# Build, train and analyze the model with the pipeline
model = model_pipeline(config)

[34m[1mwandb[0m: Currently logged in as: [33mfrackowiak[0m ([33mfrackowiak_kajetan[0m). Use [1m`wandb login --relogin`[0m to force relogin


Traceback (most recent call last):
  File "<ipython-input-9-338f5da09198>", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
  File "<ipython-input-10-a5f8c301fdba>", line 3, in make
    train, test = get_data(train=True), get_data(train=False)
NameError: name 'get_data' is not defined


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

NameError: ignored