# 1. Imports & Installation

In [1]:
%%capture
!pip install wandb --upgrade

In [90]:
import numpy as np
import random
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms

import tqdm.notebook as tqdm

import wandb # wandb is used to monitor the network during training and evaluation

In [3]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 2. Defining the pipeline

In [74]:
# The whole pipeline
def model_pipeline(hyperparameters):
    
    with wandb.init(project="pytorch-pipeline",config=hyperparameters):

        config = wandb.config # We access hyperparameters through wandb so logging matches execution

        model, train_loader, test_loader, criterion, optimizer = make(config)
        print(model)

        train(model, train_loader, test_loader , criterion, optimizer, config) # Evaluate the model at a given frequency

    return model

In [6]:
# Initialise the model and it's parameters
def make(config):

    # Make the data
    train = get_data(train_bool=True)
    test = get_data(train_bool=False)

    train_loader = make_loader(train, batch_size = config.batch_size)
    test_loader = make_loader(test, batch_size = config.batch_size)

    # Make the model
    model = Network(config.kernels,config.classes).to(device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss() #combine logsoftmax and nlloss
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    return model, train_loader, test_loader, criterion, optimizer

In [91]:
# Download the dataset
def get_data(train_bool=True):
    transform = transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5),(0.5))
                                    ])
    
    dataset = datasets.MNIST(root='data/',
                             download=True,
                             train=train_bool,
                             transform=transform)
    return dataset

In [8]:
# Make the loader
def make_loader(dataset,batch_size):
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=True)
    return loader

# 3. Defining the CNN

In [44]:
class ConvBlock(nn.Module):
    def __init__(self,nb_in,nb_out):
        super(ConvBlock,self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels=nb_in,out_channels=nb_out,kernel_size=5,stride=1,padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(nb_out),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout2d()
        )

    def forward(self, x):
        return self.block(x)

In [43]:
class FullyConnectedBlock(nn.Module):
    def __init__(self,nbInput,nbClasses):
        super(FullyConnectedBlock,self).__init__()
        self.block = nn.Sequential(
            nn.Linear(nbInput,nbClasses),
            nn.Dropout()
        )
    
    def forward(self, x):
        return self.block(x)

In [45]:
class Network(nn.Module):
    def __init__(self, kernels, nb_classes=10):
        super(Network,self).__init__()
        layers=[]
        layers.append(ConvBlock(1,kernels[0])) # 28*28*1 -> 14*14*16
        layers.append(ConvBlock(kernels[0],kernels[1])) # 14*14*16 -> 7*7*32
        layers.append(nn.Flatten()) # 1568
        layers.append(FullyConnectedBlock(7*7*kernels[-1],nb_classes))
 
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

# 4. Defining the training and test phase

In [68]:
def train(model, train_loader, test_loader ,criterion, optimizer, config):

    # Tell wandb to watch the model parameters (gradients,  weights, ...)
    wandb.watch(model,criterion,log="all",log_freq=10) #log_freq in number of steps

    nb_epochs = config.epochs

    for epoch in range(1,nb_epochs):
        model.train()

        progressB = tqdm.tqdm(enumerate(train_loader),total=len(train_loader))

        for _,(images,labels) in progressB:

            accuracy,loss = train_batch(images,labels,model,optimizer,criterion)

            wandb.log({'Train Loss': loss, 'Train Accuracy': accuracy, 'Epoch': epoch})
            progressB.set_description(f'loss: {loss.item():.2f}, accuracy: {accuracy:.2f},epoch: {epoch}/{nb_epochs}')


        # We test the modele after each epoch here
        if(epoch%1==0):
            test(model,test_loader)

In [87]:
def train_batch(images,labels,model,optimizer,criterion):
    images = images.to(device)
    labels = labels.to(device)

    # Forward propagation
    outputs = model(images)

    # Calculate softmax and cross entropy loss
    loss = criterion(outputs,labels)

    # Clear gradient
    optimizer.zero_grad()
    # Calculating gradient
    loss.backward()
    # Update parameters
    optimizer.step()

    # Compute accuracy for the batch
    _,predicted = torch.max(outputs.detach(),1)
    nb_correct = (predicted==labels).sum().item()
    total = len(labels) # batch size
    accuracy = 100*(nb_correct/total)

    return accuracy,loss

In [88]:
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    progressB = tqdm.tqdm(enumerate(test_loader),total=len(test_loader))
    with torch.no_grad(): # All the operations whill have no gradient
        for _,(images,labels) in progressB:

            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            _,predicted = torch.max(outputs.detach(),1)

            total+=labels.size(0)
            correct+=(predicted == labels).sum().item() 

        accuracy = 100*(correct/total)
        wandb.log({'Test Accuracy': accuracy})

        print(f'Accuracy on test set: {accuracy:.2f}')

    # Save the model in the exchangeable ONNX format
    torch.onnx.export(model,images,"model.onnx")
    wandb.save("model.onnx")



# 5. Using the pipeline

In [72]:
config = dict(
    epochs=10,
    classes=10,
    kernels=[16,32],
    batch_size=128,
    learning_rate=0.005,
    dataset="MNIST",
    architecture="CNN"
)

In [89]:
model = model_pipeline(config)

Network(
  (net): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (1): ReLU()
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout2d(p=0.5, inplace=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (1): ReLU()
        (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout2d(p=0.5, inplace=False)
      )
    )
    (2): Flatten(start_dim=1, end_dim=-1)
    (3): FullyConnectedBlock(
      (block): Sequential(
        (0): Linear(in_features=1568, out_features=10, bias=True)
        (1): Dropout(p=0.5, inplac

HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 97.41


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 97.95


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.20


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.29


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.28


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.53


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.34


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.51


HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Accuracy on test set: 98.73


VBox(children=(Label(value=' 0.00MB of 0.11MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.01952227264…

0,1
Train Loss,0.9321
Train Accuracy,59.375
Epoch,9.0
_runtime,166.0
_timestamp,1613688343.0
_step,4229.0
Test Accuracy,98.73


0,1
Train Loss,▆█▃▄▃▄▅▃▄▃▅▃▃▃▂▄▃▂▃▃▁▂▃▄▃▃▂▄▃▃▃▃▂▄▄▂▂▃▂▂
Train Accuracy,▄▁█▇▅▄▅▄▃▄▃▅▃▄▆▃▅▅▅▄▇▇▅▃▇▅▇▃▃▄▆▅▅▄▂▆▇▅▇▄
Epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Test Accuracy,▁▄▅▆▆▇▆▇█
