# Imports and Setups

In [None]:
!pip install wandb -q

[K     |████████████████████████████████| 1.8MB 5.3MB/s 
[K     |████████████████████████████████| 163kB 18.8MB/s 
[K     |████████████████████████████████| 102kB 7.8MB/s 
[K     |████████████████████████████████| 133kB 17.1MB/s 
[K     |████████████████████████████████| 102kB 7.6MB/s 
[K     |████████████████████████████████| 71kB 7.2MB/s 
[?25h  Building wheel for watchdog (setup.py) ... [?25l[?25hdone
  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
import wandb

!wandb login

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np

### Setup the `device` variable.

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# Download and Prepare Dataset

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')

# Define Model Architecture

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)

        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        ## Conv 1st Block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

# Training Loop

In [None]:
def train(model, device, train_loader, optimizer, epoch, steps_per_epoch=20):
  # Switch model to training mode. This is necessary for layers like dropout, batchnorm etc which behave differently in training and evaluation mode
  model.train()
  train_total = 0
  train_correct = 0

  # We loop over the data iterator, and feed the inputs to the network and adjust the weights.
  for batch_idx, (data, target) in enumerate(train_loader, start=0):
    if batch_idx > steps_per_epoch:
      break
    # Load the input features and labels from the training dataset
    data, target = data.to(device), target.to(device)
    
    # Reset the gradients to 0 for all learnable weight parameters
    optimizer.zero_grad()
    
    # Forward pass: Pass image data from training dataset, make predictions about class image belongs to (0-9 in this case)
    output = model(data)
    
    # Define our loss function, and compute the loss
    loss = F.nll_loss(output, target)

    scores, predictions = torch.max(output.data, 1)
    train_total += target.size(0)
    train_correct += int(sum(predictions == target))
            
    # Backward pass: compute the gradients of the loss w.r.t. the model's parameters
    loss.backward()
    
    # Update the neural network weights
    optimizer.step()

  acc = round((train_correct / train_total) * 100, 2)
  print('Epoch [{}], Loss: {}, Accuracy: {}'.format(epoch, loss.item(), acc), end='')
  wandb.log({'Train Loss': loss.item(), 'Train Accuracy': acc})
  

# Testing Loop

In [None]:
def test(model, device, test_loader, classes):
  # Switch model to evaluation mode. This is necessary for layers like dropout, batchnorm etc which behave differently in training and evaluation mode
  model.eval()
  test_loss = 0
  test_total = 0
  test_correct = 0

  example_images = []
  with torch.no_grad():
      for data, target in test_loader:
          # Load the input features and labels from the test dataset
          data, target = data.to(device), target.to(device)
          
          # Make predictions: Pass image data from test dataset, make predictions about class image belongs to (0-9 in this case)
          output = model(data)
          
          # Compute the loss sum up batch loss
          test_loss += F.nll_loss(output, target, reduction='sum').item()
          
          scores, predictions = torch.max(output.data, 1)
          test_total += target.size(0)
          test_correct += int(sum(predictions == target))
          
          # WandB – Log images in your test dataset automatically, along with predicted and true labels by passing pytorch tensors with image data into wandb.Image
          # example_images.append(wandb.Image(
          #     data[0], caption="Pred: {} Truth: {}".format(classes[pred[0].item()], classes[target[0]])))
  acc = round((test_correct / test_total) * 100, 2)
  print(' Test_loss: {}, Test_accuracy: {}'.format(test_loss/test_total, acc))
  wandb.log({'Test Loss': test_loss, 'Test Accuracy': acc})


# Train

In [None]:
net = Net().to(device)
print(net)

optimizer = optim.Adam(net.parameters())

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [None]:
wandb.init(project='common-ml-errors')
wandb.watch(net, log='all')

for epoch in range(10):
  train(net, device, trainloader, optimizer, epoch)
  test(net, device, testloader, classes)

print('Finished Training')
wandb.finish()

Epoch [0], Loss: 0.764218807220459, Accuracy: 61.9 Test_loss: 0.5004308021306991, Test_accuracy: 85.13
Epoch [1], Loss: 0.3415467143058777, Accuracy: 86.83 Test_loss: 0.2989566255450249, Test_accuracy: 90.95
Epoch [2], Loss: 0.2098771631717682, Accuracy: 91.89 Test_loss: 0.22856234542131423, Test_accuracy: 93.03
Epoch [3], Loss: 0.31515854597091675, Accuracy: 94.05 Test_loss: 0.17724223912954332, Test_accuracy: 94.56
Epoch [4], Loss: 0.21923533082008362, Accuracy: 95.31 Test_loss: 0.1402343296289444, Test_accuracy: 95.87
Epoch [5], Loss: 0.25874876976013184, Accuracy: 95.01 Test_loss: 0.13948047368824482, Test_accuracy: 95.93
Epoch [6], Loss: 0.12303639948368073, Accuracy: 95.98 Test_loss: 0.118904037296772, Test_accuracy: 96.46
Epoch [7], Loss: 0.08606034517288208, Accuracy: 95.31 Test_loss: 0.0978650753751397, Test_accuracy: 97.19
Epoch [8], Loss: 0.18735285103321075, Accuracy: 96.35 Test_loss: 0.0895503547489643, Test_accuracy: 97.23
Epoch [9], Loss: 0.15630902349948883, Accuracy: 9

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Loss,0.15631
Train Accuracy,95.83
_step,19.0
_runtime,37.0
_timestamp,1605883104.0
Test Loss,988.39707
Test Accuracy,97.25


0,1
Train Loss,█▄▂▃▂▃▁▁▂▂
Train Accuracy,▁▆▇███████
_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
_runtime,▁▂▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇▇█
_timestamp,▁▂▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇▇█
Test Loss,█▅▃▂▂▂▂▁▁▁
Test Accuracy,▁▄▆▆▇▇████
