In [1]:
import random
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms

from tqdm.notebook import tqdm

torch.backends.cudnn.deterministic  = True
random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

device = "cuda" if torch.cuda.is_available() else "cpu"


In [2]:
!pip install wandb --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.14.0-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.17.0-py2.py3-none-any.whl (189 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.1/189.1 KB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)

In [3]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
config = dict(epochs = 5 , classes = 10 , kernels = [16,32] , batch_size = 128 , learning_rate = .005 , dataset = "MNIST" , architecture = "CNN")

In [5]:
def model_pipeline(hyperparam):
  with wandb.init(project = "pytorch-demo" , config = hyperparam):
    config = wandb.config
    model , train_loader , test_loader , criterion , optimizer = make(config)
    print(model)
    train(model , train_loader , criterion , optimizer , config)
    test(model , test_loader)
  return model

In [6]:
def make(config):
  train ,test = get_data(train = True) , get_data(train = False)
  train_loader = make_loader(train , batch_size = config.batch_size)
  test_loader = make_loader(test , batch_size = config.batch_size)

  model = ConvNet(config.kernels , config.classes).to(device)

  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters() , lr = config.learning_rate)

  return model , train_loader , test_loader , criterion , optimizer



  

In [7]:
def get_data(slice = 5 , train = True):
  full_dataset = torchvision.datasets.MNIST(root = "." , train = train , transform = transforms.ToTensor() , download = True)

  sub_dataset = torch.utils.data.Subset(full_dataset , indices = range(0 , len(full_dataset) , slice))
  return sub_dataset

def make_loader(dataset , batch_size):
  loader = torch.utils.data.DataLoader(dataset = dataset, batch_size = batch_size , shuffle = True)
  return loader

In [8]:
import torch.nn as nn
class ConvNet(nn.Module):
  def __init__(self , kernels , classes):
    super().__init__()

    self.layer1 = nn.Sequential(
        nn.Conv2d(1 , kernels[0] , kernel_size = 5 , stride = 1 , padding = 2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2 , stride = 2)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(16 , kernels[1] , kernel_size = 5 , stride =1,padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2 , stride = 2)
    )
    self.fc = nn.Linear(7*7*kernels[-1] , classes)

  def forward(self , x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.reshape(out.size(0) , -1)
    out=  self.fc(out)
    return out


we will use wandb watch and log

watch  - to track graident

log = everthing else

In [9]:
def train(model , loader , critierion , optimizer , config):
  wandb.watch(model , critierion , log = 'all' , log_freq = 10)

  total_batch = len(loader) * config.epochs
  example_ct = 0
  batch_ct = 0

  for epoch in tqdm(range(config.epochs)):
    for _ , (images , labels) in enumerate(loader):
      loss = train_batch(images,labels , model , optimizer , critierion)
      example_ct += len(images)
      batch_ct +=1

      if((batch_ct +1) % 25 == 0):
        train_log(loss , example_ct , epoch)

def train_batch(images , labels,model , optimizer , critierion):
  images = images.to(device)
  labels = labels.to(device)

  output = model(images)
  loss = critierion(output , labels)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  return loss



In [10]:
def train_log(loss , example_ct , epoch):
  loss = float(loss)
  wandb.log({"epoch":epoch , "loss":loss} , step = example_ct)
  print(f"loss after " + str(example_ct).zfill(5) + f"example: {loss:.3f}")

In [13]:
def test(model , test_loader):

  with torch.no_grad():
    correct , total= 0,0
    for images , labels in test_loader:
      images , labels = images.to(device) , labels.to(device)
      output = model(images)
      _ , pred = torch.max(output.data ,1)
      total += labels.size(0)
      correct += (pred == labels).sum().item()

    print(f"accuracy of model on {total} images is {correct/total}% ")
    wandb.log({"test_accuracy" : correct/total})

  torch.onnx.export(model , images,"model.onnx")
  wandb.save("model.onnx")


In [14]:
model = model_pipeline(config)

ConvNet(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=1568, out_features=10, bias=True)
)


  0%|          | 0/5 [00:00<?, ?it/s]

loss after 03072example: 0.423
loss after 06272example: 0.222
loss after 09472example: 0.099
loss after 12640example: 0.194
loss after 15840example: 0.115
loss after 19040example: 0.054
loss after 22240example: 0.051
loss after 25408example: 0.014
loss after 28608example: 0.026
loss after 31808example: 0.105
loss after 35008example: 0.053
loss after 38176example: 0.012
loss after 41376example: 0.016
loss after 44576example: 0.048
loss after 47776example: 0.037
loss after 50944example: 0.065
loss after 54144example: 0.019
loss after 57344example: 0.010
accuracy of model on 2000 images is 0.978% 


VBox(children=(Label(value='0.113 MB of 0.113 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆███
loss,█▅▃▄▃▂▂▁▁▃▂▁▁▂▁▂▁▁
test_accuracy,▁

0,1
epoch,4.0
loss,0.00957
test_accuracy,0.978
