# Imports

In [1]:
import torch.nn as nn
import math
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import CyclicLR
from torchsummary import summary

import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
import os
import argparse
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import torch


# Importing time
from time import time
from datetime import datetime

# Check GPU

In [2]:
# Check if using GPU 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
  print(f"Nvidia Cuda/GPU is available!")

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

# Path to store results
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

Nvidia Cuda/GPU is available!
Fri Apr 16 15:53:40 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    28W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------

# Load Google Drive


In [None]:
# make sure to change it to your own path to our project folder
from google.colab import drive
drive.mount('/content/gdrive' )
%cd '/content/gdrive/MyDrive/Mcgill/Winter2021/ECSE552final'
#%cd '/content/gdrive/MyDrive/ECSE552final'
%ls

# Training and testing function

## training and testing

In [None]:
def train_step(train_data, model, criterion, optimizer):
    """Train network.

    Args:
        train_data (DataLoader): Validation set to perform the evaluation
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
    """
    total_loss = 0
    predicted_ok = 0
    total_images = 0
    model.train()

    for images, labels in train_data:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        optimizer.zero_grad()

        # Add training Tensor to the model (input).
        output = model(images)
        loss = criterion(output, labels)
        total_loss += loss.item() * images.size(0)

        _, predicted = torch.max(output.data, 1)
        total_images += labels.size(0)
        predicted_ok += (predicted == labels).sum().item()
        accuracy = predicted_ok/total_images*100

        # Run training (backward propagation).
        loss.backward()

        # Optimize weights.
        optimizer.step()

        
    epoch_loss = total_loss / len(train_data.dataset)

    return model, accuracy, epoch_loss



def test_evaluation(validation_data, model, criterion):
    """Test trained network
    Args:
        validation_data (DataLoader): Validation set to perform the evaluation
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
    """
    total_loss = 0
    predicted_ok = 0
    total_images = 0
    model.eval()

    for images, labels in validation_data:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        pred = model(images)
        loss = criterion(pred, labels)
        total_loss += loss.item() * images.size(0)

        _, predicted = torch.max(pred.data, 1)
        total_images += labels.size(0)
        predicted_ok += (predicted == labels).sum().item()
        accuracy = predicted_ok/total_images*100
        error = (1-predicted_ok/total_images)*100

    epoch_loss = total_loss / len(validation_data.dataset)

    return model, accuracy, error, epoch_loss

## full training and testing function with plot

In [None]:
def training_loop(model, criterion, optimizer, scheduler, train_data, validation_data, epochs, print_every=1):
    """Training loop.

    Args:
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
        train_data (DataLoader): Validation set to perform the evaluation
        validation_data (DataLoader): Validation set to perform the evaluation
        epochs (int): global parameter to define epochs number
        print_every (int): defines how many times to print training progress
    """
    train_losses = []
    valid_losses = []
    test_error = []
    testing_acc = []

    # scheduler = CyclicLR(optimizer, base_lr = 0.1, max_lr= 0.5 , step_size_up= N_EPOCHS/2 , cycle_momentum=True, base_momentum=0.95, max_momentum=0.85)

    # Train model
    for epoch in range(0, epochs):

        print ("epoch", epoch)
        # Train_step
        model, training_acc, train_loss = train_step(train_data, model, criterion, optimizer)
        train_losses.append(train_loss)

        # Decay learning rate if needed.
        scheduler.step()

        if epoch % print_every == (print_every - 1):
            # Validate_step
            with torch.no_grad():
                model, accuracy, error, valid_loss = test_evaluation(validation_data, model, criterion)
                valid_losses.append(valid_loss)
                test_error.append(error)
                testing_acc.append(accuracy)

            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Training accuracy: {training_acc:.2f}%\t'
                  f'Test accuracy: {accuracy:.2f}%\t')

    ## Save results and plot figures
    np.savetxt(os.path.join(RESULTS, "Test_error.csv"), test_error, delimiter=",")
    np.savetxt(os.path.join(RESULTS, "Train_Losses.csv"), train_losses, delimiter=",")
    np.savetxt(os.path.join(RESULTS, "Valid_Losses.csv"), valid_losses, delimiter=",")
    plot_results(train_losses, valid_losses, test_error, testing_acc)

    return model, optimizer, (train_losses, valid_losses, test_error, testing_acc)


def plot_results(train_losses, valid_losses, test_error, testing_acc):
    """Plot results.
    Args:
        train_losses: training losses as calculated in the training_loop
        valid_losses: validation losses as calculated in the training_loop
        test_error: test error as calculated in the training_loop
    """
    fig = plt.plot(train_losses, 'r-s', valid_losses, 'b-o')
    plt.title('aihwkit ResNet18 with CIFAR10 dataset')
    plt.legend(fig[:2], ['Training Losses', 'Validation Losses'])
    plt.xlabel('Epoch number')
    plt.ylabel('Loss [A.U.]')
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'test_losses.png'))
    plt.close()

    fig = plt.plot(test_error, 'r-s')
    plt.title('aihwkit ResNet18 with CIFAR10 dataset')
    plt.legend(fig[:1], ['Test Error'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test Error [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'test_error.png'))
    plt.close()

    fig = plt.plot(testing_acc, 'r-s')
    plt.title('aihwkit ResNet18 with CIFAR10 dataset')
    plt.legend(fig[:1], ['Test accuracy'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test accuracy [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'test_accuracy.png'))
    plt.close()


# Training in CIFAR 10
Prepare data and set hyperparamter



In [None]:
BATCH_SIZE = 512

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print ("done!")

## Resent 18	


### function for creating resnet 18

In [None]:
class Block(nn.Module):
    def __init__(self, num_layers, in_channels, out_channels, identity_downsample=None, stride=1):
        assert num_layers in [18, 34, 50, 101, 152], "should be a a valid architecture"
        super(Block, self).__init__()
        self.num_layers = num_layers
        if self.num_layers > 34:
            self.expansion = 4
            print ("in")
        else:
            self.expansion = 1
        
        # for ResNet18 and 34, connect input directly to (3x3) kernel (skip first (1x1))

        self.conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        self.identity_downsample = identity_downsample
        self.relu2 = nn.ReLU()

    def forward(self, x):
        identity = x

        x = self.conv2(x)     #nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        x = self.bn2(x)       #nn.BatchNorm2d(out_channels)
        x = self.relu(x)      #nn.ReLU()
        x = self.conv3(x)     #nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, padding=0)
        x = self.bn3(x)       #nn.BatchNorm2d(out_channels * self.expansion)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        x += identity
        x = self.relu2(x)
        return x


class ResNet(nn.Module):
    def __init__(self, num_layers, block, image_channels, num_classes):
        assert num_layers in [18, 34, 50, 101, 152], f'ResNet{num_layers}: Unknown architecture! Number of layers has ' \
                                                     f'to be 18, 34, 50, 101, or 152 '
        super(ResNet, self).__init__()
        if num_layers < 50:
            self.expansion = 1
        else:
            self.expansion = 4
        if num_layers == 18:
            layers = [2, 2, 2, 2]
        elif num_layers == 34 or num_layers == 50:
            layers = [3, 4, 6, 3]
        elif num_layers == 101:
            layers = [3, 4, 23, 3]
        else:
            layers = [3, 8, 36, 3]
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        #ResNetLayers
        self.layer1 = self.make_layers(num_layers, block, layers[0], intermediate_channels=64, stride=1)
        self.layer2 = self.make_layers(num_layers, block, layers[1], intermediate_channels=128, stride=2)
        self.layer3 = self.make_layers(num_layers, block, layers[2], intermediate_channels=256, stride=2)
        self.layer4 = self.make_layers(num_layers, block, layers[3], intermediate_channels=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(512 * self.expansion, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

    def make_layers(self, num_layers, block, num_residual_blocks, intermediate_channels, stride):
        layers = []

        identity_downsample = nn.Sequential(nn.Conv2d(self.in_channels, intermediate_channels*self.expansion, kernel_size=1, stride=stride),
                                            nn.BatchNorm2d(intermediate_channels*self.expansion))
        layers.append(block(num_layers, self.in_channels, intermediate_channels, identity_downsample, stride))
        self.in_channels = intermediate_channels * self.expansion # 256
        for i in range(num_residual_blocks - 1):
            layers.append(block(num_layers, self.in_channels, intermediate_channels)) # 256 -> 64, 64*4 (256) again
        return nn.Sequential(*layers)


def ResNet18(img_channels=3, num_classes=1000):
    return ResNet(18, Block, img_channels, num_classes)



model = ResNet18(img_channels=3, num_classes=10).to(DEVICE)
summary(model, (3, 32, 32), 512)
# print (model)

### Training+Testing

In [None]:
model = ResNet18(img_channels=3, num_classes=10).to(DEVICE)

N_EPOCHS = 30
BATCH_SIZE = 512
lr = 0.1
SEED = 1
N_CLASSES = 10

%pwd
%mkdir 'results'/'ResNet18 with CIFAR10 dataset'

RESULTS = os.path.join(os.getcwd(), 'results', 'ResNet18 with CIFAR10 dataset')

print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
      f'Start running!')

optimizer = optim.SGD(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

scheduler = CyclicLR(optimizer, base_lr = 0.1, max_lr= 0.5 , step_size_up= N_EPOCHS/2 , cycle_momentum=True, base_momentum=0.95, max_momentum=0.85)
# scheduler = CyclicLR(optimizer, base_lr = 0.01, max_lr= 0.5 , step_size_up= N_EPOCHS/2 , cycle_momentum=True, base_momentum=0.95, max_momentum=0.85)
# 22:38:28 --- Epoch: 29	Train loss: 0.3007	Valid loss: 0.4812	Training accuracy: 89.31%	Test accuracy: 84.46%

model, optimizer, _ = training_loop(model, criterion, optimizer, scheduler, trainloader, testloader, N_EPOCHS, print_every= 1)

print(f'{datetime.now().time().replace(microsecond=0)} --- '
          f'Complete running!')

## VGG 8

### Training parameters

In [None]:
# Training parameters
SEED = 1
N_EPOCHS = 50
BATCH_SIZE = 1024
LEARNING_RATE = 0.01
N_CLASSES = 10

### Data Preparation

In [None]:
# make directories to store the training and validation data and results at the end
%pwd
if not os.path.exists("vgg8_cifar10_data") and not os.path.exists("vgg8_cifar10_results_with_pytorch_layers"):
  print("Making directories....")
  %mkdir vgg8_cifar10_data
  %mkdir vgg8_cifar10_results_with_pytorch_layers
else:
  print("Directories already exists!")

Directories already exists!


In [None]:
# set paths for training and validation data and results
PATH_DATASET = "./vgg8_cifar10_data"
RESULTS = "./vgg8_cifar10_results_with_pytorch_layers"

In [None]:
# load data
def load_images_cifar10():
    """Load images for train from torchvision datasets."""

    mean = torch.tensor([0.4914, 0.4822, 0.4465])
    std = torch.tensor([0.2023, 0.1994, 0.2010])

    print(f'Normalization data: ({mean},{std})')

    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize(mean, std)])
    train_set = datasets.CIFAR10(PATH_DATASET, download=True, train=True, transform=transform)
    val_set = datasets.CIFAR10(PATH_DATASET, download=True, train=False, transform=transform)
    train_data = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
    validation_data = torch.utils.data.DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)

    return train_data, validation_data

### Architecture

In [None]:
# adapted from https://github.com/IBM/aihwkit/blob/master/examples/11_vgg8_training.py
def VGG8():
    """VGG8 inspired analog model."""
    model = nn.Sequential(
        
        # conv layers
        nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1),

        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1),

        nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1),

        # Linear layers
        nn.Flatten(),
        nn.Linear(in_features=8192, out_features=1024),
        nn.ReLU(),
        nn.Linear(in_features=1024, out_features=N_CLASSES),
        nn.LogSoftmax(dim=1)
    )

    return model

### SGD Optimizer

In [None]:
def create_sgd_optimizer(model, learning_rate):
    """Create the analog-aware optimizer.
    Args:
        model (nn.Module): model to be trained
        learning_rate (float): global parameter to define learning rate
    """
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    # optimizer.regroup_param_groups(model)

    return optimizer

### Training Function

In [None]:
def train(train_data, model, criterion, optimizer):
    """Train network.
    Args:
        train_data (DataLoader): Validation set to perform the evaluation
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
    """
    total_loss = 0
    predicted_ok = 0
    total_images = 0
    model.train()
    for images, labels in train_data:

        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        # Add training Tensor to the model (input).
        output = model(images)
        loss = criterion(output, labels)
        total_loss += loss.item() * images.size(0)

        _, predicted = torch.max(output.data, 1)
        total_images += labels.size(0)
        predicted_ok += (predicted == labels).sum().item()
        accuracy = predicted_ok/total_images*100

        # Run training (backward propagation).
        loss.backward()

        # Optimize weights.
        optimizer.step()
        
    epoch_loss = total_loss / len(train_data.dataset)

    return model, accuracy, optimizer, epoch_loss

### Validation Function

In [None]:
def validate(validation_data, model, criterion):
    """Test trained network
    Args:
        validation_data (DataLoader): Validation set to perform the evaluation
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
    """
    total_loss = 0
    predicted_ok = 0
    total_images = 0

    model.eval()

    for images, labels in validation_data:
        images = images.to(device)
        labels = labels.to(device)

        pred = model(images)
        loss = criterion(pred, labels)
        total_loss += loss.item() * images.size(0)

        _, predicted = torch.max(pred.data, 1)
        total_images += labels.size(0)
        predicted_ok += (predicted == labels).sum().item()
        accuracy = predicted_ok/total_images*100
        error = (1-predicted_ok/total_images)*100

    epoch_loss = total_loss / len(validation_data.dataset)

    return model, epoch_loss, error, accuracy

### Training/Validation Loop

In [None]:
def train_val_loop_sc(model, criterion, optimizer, 
                   train_data, validation_data, epochs, 
                   scheduler, print_every=1):
    """Training loop.
    Args:
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
        train_data (DataLoader): Validation set to perform the evaluation
        validation_data (DataLoader): Validation set to perform the evaluation
        epochs (int): global parameter to define epochs number
        print_every (int): defines how many times to print training progress
    """
    train_losses = []
    valid_losses = []
    test_error = []
    testing_acc = []

    # Train model
    for epoch in range(0, epochs):
        
        # Train_step
        model, training_acc, optimizer, train_loss = train(train_data, model, criterion, optimizer)
        train_losses.append(train_loss)

        if epoch % print_every == (print_every - 1):
            # Validate_step
            with torch.no_grad():
                model, valid_loss, error, accuracy = validate(
                    validation_data, model, criterion)
                valid_losses.append(valid_loss)
                test_error.append(error)
                testing_acc.append(accuracy)


            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Training accuracy: {training_acc:.2f}%\t'
                  f'Test error: {error:.2f}%\t'
                  f'Test accuracy: {accuracy:.2f}%\t')
        
        # for cyclic learning rate training 
        scheduler.step()

    # Save results and plot figures
    # np.savetxt(os.path.join(RESULTS, "Test_error.csv"), test_error, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Train_Losses.csv"), train_losses, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Valid_Losses.csv"), valid_losses, delimiter=",")
    plot_results_super_convergence(train_losses, valid_losses, test_error, testing_acc)

    return model, optimizer, (train_losses, valid_losses, test_error)

In [None]:
def train_val_loop_nsc(model, criterion, optimizer, 
                   train_data, validation_data, epochs, print_every=1):
    """Training loop.
    Args:
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
        train_data (DataLoader): Validation set to perform the evaluation
        validation_data (DataLoader): Validation set to perform the evaluation
        epochs (int): global parameter to define epochs number
        print_every (int): defines how many times to print training progress
    """
    train_losses = []
    valid_losses = []
    test_error = []
    testing_acc = []

    # Train model
    for epoch in range(0, epochs):
        
        # Train_step
        model, training_acc, optimizer, train_loss = train(train_data, model, criterion, optimizer)
        train_losses.append(train_loss)

        if epoch % print_every == (print_every - 1):
            # Validate_step
            with torch.no_grad():
                model, valid_loss, error, accuracy = validate(
                    validation_data, model, criterion)
                valid_losses.append(valid_loss)
                test_error.append(error)
                testing_acc.append(accuracy)


            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Training accuracy: {training_acc:.2f}%\t'
                  f'Test error: {error:.2f}%\t'
                  f'Test accuracy: {accuracy:.2f}%\t')

    # Save results and plot figures
    # np.savetxt(os.path.join(RESULTS, "Test_error.csv"), test_error, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Train_Losses.csv"), train_losses, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Valid_Losses.csv"), valid_losses, delimiter=",")
    plot_results(train_losses, valid_losses, test_error, testing_acc)

    return model, optimizer, (train_losses, valid_losses, test_error)

### Plot Results

In [None]:
def plot_results_super_convergence(train_losses, valid_losses, test_error, test_acc):
    """Plot results.
    Args:
        train_losses: training losses as calculated in the training_loop
        valid_losses: validation losses as calculated in the training_loop
        test_error: test error as calculated in the training_loop
    """
    fig = plt.plot(train_losses, 'r-s', valid_losses, 'b-o')
    plt.title('VGG8 CIFAR-10 Super Convergence Loss')
    plt.legend(fig[:2], ['Training Losses', 'Validation Losses'])
    plt.xlabel('Epoch number')
    plt.ylabel('Loss [A.U.]')
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_cifar10_sc_test_losses.png'))
    plt.close()

    fig = plt.plot(test_error, 'r-s')
    plt.title('VGG8 CIFAR-10 Super Convergence Test Error')
    plt.legend(fig[:1], ['Test Error'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test Error [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_cifar10_sc_test_error.png'))
    plt.close()

    fig = plt.plot(test_acc, 'r-s')
    plt.title('VGG8 CIFAR-10 Super Convergence Accuracy')
    plt.legend(fig[:1], ['Test accuracy'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test accuracy [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_cifar10_sc_test_accuracy.png'))
    plt.close()

In [None]:
def plot_results(train_losses, valid_losses, test_error, test_acc):
    """Plot results.
    Args:
        train_losses: training losses as calculated in the training_loop
        valid_losses: validation losses as calculated in the training_loop
        test_error: test error as calculated in the training_loop
    """
    fig = plt.plot(train_losses, 'r-s', valid_losses, 'b-o')
    plt.title('VGG8 CIFAR-10 Loss')
    plt.legend(fig[:2], ['Training Losses', 'Validation Losses'])
    plt.xlabel('Epoch number')
    plt.ylabel('Loss [A.U.]')
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_cifar10_test_losses.png'))
    plt.close()

    fig = plt.plot(test_error, 'r-s')
    plt.title('VGG8 CIFAR-10 Test Error')
    plt.legend(fig[:1], ['Test Error'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test Error [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_cifar10_test_error.png'))
    plt.close()

    fig = plt.plot(test_acc, 'r-s')
    plt.title('VGG8 CIFAR-10 Accuracy')
    plt.legend(fig[:1], ['Test accuracy'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test accuracy [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_cifar10_test_accuracy.png'))
    plt.close()

### Main() - Super Convergence

In [None]:
# set seed
torch.manual_seed(SEED)

<torch._C.Generator at 0x7eff6452e870>

In [None]:
# Load datasets.
train_data, validation_data = load_images_cifar10()

Normalization data: (tensor([0.4914, 0.4822, 0.4465]),tensor([0.2023, 0.1994, 0.2010]))
Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Prepare the model.
model = VGG8()
model.to(device)
print(model)

Sequential(
  (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU()
  (14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (16): ReLU()
  (17): MaxPool2d(ker

In [None]:
# optimizer
optimizer = create_sgd_optimizer(model, LEARNING_RATE)

# super convergence scheduler
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 
                                              base_lr = 0.01, 
                                              max_lr= 0.15, 
                                              step_size_up= math.floor(N_EPOCHS/2), 
                                              cycle_momentum=True, 
                                              base_momentum=0.95, 
                                              max_momentum=0.85)
# loss function
criterion = nn.CrossEntropyLoss()

In [None]:
print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
  f'Started Vgg8 Example')

# training loop
model, optimizer, _ = train_val_loop_sc(model, criterion, optimizer, 
                                    train_data, validation_data,
                                        N_EPOCHS, scheduler)

print(f'{datetime.now().time().replace(microsecond=0)} --- '
  f'Completed Vgg8 Example')


19:03:30 --- Started Vgg8 Example
19:04:04 --- Epoch: 0	Train loss: 1.5105	Valid loss: 1.6177	Training accuracy: 44.92%	Test error: 59.62%	Test accuracy: 40.38%	
19:04:40 --- Epoch: 1	Train loss: 1.2788	Valid loss: 1.1733	Training accuracy: 55.29%	Test error: 41.97%	Test accuracy: 58.03%	
19:05:16 --- Epoch: 2	Train loss: 0.8797	Valid loss: 0.8934	Training accuracy: 68.55%	Test error: 30.50%	Test accuracy: 69.50%	
19:05:52 --- Epoch: 3	Train loss: 0.6960	Valid loss: 0.9109	Training accuracy: 75.36%	Test error: 29.24%	Test accuracy: 70.76%	
19:06:28 --- Epoch: 4	Train loss: 0.6185	Valid loss: 0.8048	Training accuracy: 78.39%	Test error: 27.48%	Test accuracy: 72.52%	
19:07:03 --- Epoch: 5	Train loss: 0.5567	Valid loss: 1.0608	Training accuracy: 80.41%	Test error: 32.44%	Test accuracy: 67.56%	
19:07:39 --- Epoch: 6	Train loss: 0.4940	Valid loss: 0.7027	Training accuracy: 82.68%	Test error: 22.90%	Test accuracy: 77.10%	
19:08:15 --- Epoch: 7	Train loss: 0.4398	Valid loss: 0.5719	Training 

### Main() - Non Super Convergence

In [None]:
# set seed
torch.manual_seed(SEED)

<torch._C.Generator at 0x7eff6452e870>

In [None]:
# Load datasets.
train_data, validation_data = load_images_cifar10()

Normalization data: (tensor([0.4914, 0.4822, 0.4465]),tensor([0.2023, 0.1994, 0.2010]))
Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Prepare the model.
model = VGG8()
model.to(device)
print(model)

Sequential(
  (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU()
  (14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (16): ReLU()
  (17): MaxPool2d(ker

In [None]:
# optimizer
optimizer = create_sgd_optimizer(model, LEARNING_RATE)

# loss function
criterion = nn.CrossEntropyLoss()

In [None]:
print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
  f'Started Vgg8 Example')

# training loop
model, optimizer, _ = train_val_loop_nsc(model, criterion, optimizer, 
                                    train_data, validation_data,
                                        N_EPOCHS)

print(f'{datetime.now().time().replace(microsecond=0)} --- '
  f'Completed Vgg8 Example')


19:19:11 --- Started Vgg8 Example
19:19:47 --- Epoch: 0	Train loss: 1.7668	Valid loss: 1.6773	Training accuracy: 37.11%	Test error: 61.06%	Test accuracy: 38.94%	
19:20:23 --- Epoch: 1	Train loss: 1.4309	Valid loss: 1.4739	Training accuracy: 48.41%	Test error: 52.43%	Test accuracy: 47.57%	
19:20:58 --- Epoch: 2	Train loss: 1.3010	Valid loss: 1.3946	Training accuracy: 52.99%	Test error: 50.11%	Test accuracy: 49.89%	
19:21:34 --- Epoch: 3	Train loss: 1.1979	Valid loss: 1.1927	Training accuracy: 57.25%	Test error: 42.64%	Test accuracy: 57.36%	
19:22:10 --- Epoch: 4	Train loss: 1.1083	Valid loss: 1.1365	Training accuracy: 60.51%	Test error: 40.91%	Test accuracy: 59.09%	
19:22:46 --- Epoch: 5	Train loss: 1.0547	Valid loss: 1.0651	Training accuracy: 62.36%	Test error: 37.99%	Test accuracy: 62.01%	
19:23:22 --- Epoch: 6	Train loss: 0.9749	Valid loss: 1.1161	Training accuracy: 65.46%	Test error: 40.02%	Test accuracy: 59.98%	
19:23:58 --- Epoch: 7	Train loss: 0.9307	Valid loss: 1.1792	Training 

# Training in SVHN

## VGG 8

### Training Parameters

In [None]:
# Training parameters
SEED = 1
N_EPOCHS = 50
BATCH_SIZE = 1024
LEARNING_RATE = 0.001
N_CLASSES = 10

### Data Preparation

In [None]:
# make directories to store the training and validation data and results at the end
%pwd
if not os.path.exists("vgg8_svhn_data") and not os.path.exists("vgg8_svhn_results_with_pytorch_layers"):
  print("Making directories....")
  %mkdir vgg8_svhn_data
  %mkdir vgg8_svhn_results_with_pytorch_layers
else:
  print("Directories already exists!")

Directories already exists!


In [None]:
# set paths for training and validation data and results
PATH_DATASET = "./vgg8_svhn_data"
RESULTS = "./vgg8_svhn_results_with_pytorch_layers"

In [None]:
# load data
def load_images_svhn():
    """Load images for train from torchvision datasets."""

    mean = torch.tensor([0.4377, 0.4438, 0.4728])
    std = torch.tensor([0.1980, 0.2010, 0.1970])

    print(f'Normalization data: ({mean},{std})')

    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize(mean, std)])
    train_set = datasets.SVHN(PATH_DATASET, download=True, split='train', transform=transform)
    val_set = datasets.SVHN(PATH_DATASET, download=True, split='test', transform=transform)
    train_data = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
    validation_data = torch.utils.data.DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)

    return train_data, validation_data

### Architecture

In [None]:
# adapted from https://github.com/IBM/aihwkit/blob/master/examples/11_vgg8_training.py
def VGG8():
    """VGG8 inspired analog model."""
    model = nn.Sequential(
        
        # conv layers
        nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1),

        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1),

        nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1),

        # Linear layers
        nn.Flatten(),
        nn.Linear(in_features=8192, out_features=1024),
        nn.ReLU(),
        nn.Linear(in_features=1024, out_features=N_CLASSES),
        nn.LogSoftmax(dim=1)
    )

    return model

### SGD Optimizer

In [None]:
def create_sgd_optimizer(model, learning_rate):
    """Create the analog-aware optimizer.
    Args:
        model (nn.Module): model to be trained
        learning_rate (float): global parameter to define learning rate
    """
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=5e-4)
    # optimizer.regroup_param_groups(model)

    return optimizer

### Training Function

In [None]:
def train(train_data, model, criterion, optimizer):
    """Train network.
    Args:
        train_data (DataLoader): Validation set to perform the evaluation
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
    """
    total_loss = 0
    predicted_ok = 0
    total_images = 0
    model.train()
    for images, labels in train_data:

        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        # Add training Tensor to the model (input).
        output = model(images)
        loss = criterion(output, labels)
        total_loss += loss.item() * images.size(0)

        _, predicted = torch.max(output.data, 1)
        total_images += labels.size(0)
        predicted_ok += (predicted == labels).sum().item()
        accuracy = predicted_ok/total_images*100

        # Run training (backward propagation).
        loss.backward()

        # Optimize weights.
        optimizer.step()
        
    epoch_loss = total_loss / len(train_data.dataset)

    return model, accuracy, optimizer, epoch_loss

### Validation Function

In [None]:
def validate(validation_data, model, criterion):
    """Test trained network
    Args:
        validation_data (DataLoader): Validation set to perform the evaluation
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
    """
    total_loss = 0
    predicted_ok = 0
    total_images = 0

    model.eval()

    for images, labels in validation_data:
        images = images.to(device)
        labels = labels.to(device)

        pred = model(images)
        loss = criterion(pred, labels)
        total_loss += loss.item() * images.size(0)

        _, predicted = torch.max(pred.data, 1)
        total_images += labels.size(0)
        predicted_ok += (predicted == labels).sum().item()
        accuracy = predicted_ok/total_images*100
        error = (1-predicted_ok/total_images)*100

    epoch_loss = total_loss / len(validation_data.dataset)

    return model, epoch_loss, error, accuracy

### Training/Validation Loop

In [None]:
def train_val_loop_sc(model, criterion, optimizer, 
                   train_data, validation_data, epochs, 
                   scheduler, print_every=1):
    """Training loop.
    Args:
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
        train_data (DataLoader): Validation set to perform the evaluation
        validation_data (DataLoader): Validation set to perform the evaluation
        epochs (int): global parameter to define epochs number
        print_every (int): defines how many times to print training progress
    """
    train_losses = []
    valid_losses = []
    test_error = []
    testing_acc = []

    # Train model
    for epoch in range(0, epochs):
        
        # Train_step
        model, training_acc, optimizer, train_loss = train(train_data, model, criterion, optimizer)
        train_losses.append(train_loss)

        if epoch % print_every == (print_every - 1):
            # Validate_step
            with torch.no_grad():
                model, valid_loss, error, accuracy = validate(
                    validation_data, model, criterion)
                valid_losses.append(valid_loss)
                test_error.append(error)
                testing_acc.append(accuracy)


            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Training accuracy: {training_acc:.2f}%\t'
                  f'Test error: {error:.2f}%\t'
                  f'Test accuracy: {accuracy:.2f}%\t')
        
        # for cyclic learning rate training 
        scheduler.step()

    # Save results and plot figures
    # np.savetxt(os.path.join(RESULTS, "Test_error.csv"), test_error, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Train_Losses.csv"), train_losses, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Valid_Losses.csv"), valid_losses, delimiter=",")
    plot_results_super_convergence(train_losses, valid_losses, test_error, testing_acc)

    return model, optimizer, (train_losses, valid_losses, test_error)

In [None]:
def train_val_loop_nsc(model, criterion, optimizer, 
                   train_data, validation_data, epochs, print_every=1):
    """Training loop.
    Args:
        model (nn.Module): Trained model to be evaluated
        criterion (nn.CrossEntropyLoss): criterion to compute loss
        optimizer (Optimizer): analog model optimizer
        train_data (DataLoader): Validation set to perform the evaluation
        validation_data (DataLoader): Validation set to perform the evaluation
        epochs (int): global parameter to define epochs number
        print_every (int): defines how many times to print training progress
    """
    train_losses = []
    valid_losses = []
    test_error = []
    testing_acc = []

    # Train model
    for epoch in range(0, epochs):
        
        # Train_step
        model, training_acc, optimizer, train_loss = train(train_data, model, criterion, optimizer)
        train_losses.append(train_loss)

        if epoch % print_every == (print_every - 1):
            # Validate_step
            with torch.no_grad():
                model, valid_loss, error, accuracy = validate(
                    validation_data, model, criterion)
                valid_losses.append(valid_loss)
                test_error.append(error)
                testing_acc.append(accuracy)


            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Training accuracy: {training_acc:.2f}%\t'
                  f'Test error: {error:.2f}%\t'
                  f'Test accuracy: {accuracy:.2f}%\t')

    # Save results and plot figures
    # np.savetxt(os.path.join(RESULTS, "Test_error.csv"), test_error, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Train_Losses.csv"), train_losses, delimiter=",")
    # np.savetxt(os.path.join(RESULTS, "Valid_Losses.csv"), valid_losses, delimiter=",")
    plot_results(train_losses, valid_losses, test_error, testing_acc)

    return model, optimizer, (train_losses, valid_losses, test_error)

### Plot Results

In [None]:
def plot_results_super_convergence(train_losses, valid_losses, test_error, test_acc):
    """Plot results.
    Args:
        train_losses: training losses as calculated in the training_loop
        valid_losses: validation losses as calculated in the training_loop
        test_error: test error as calculated in the training_loop
    """
    fig = plt.plot(train_losses, 'r-s', valid_losses, 'b-o')
    plt.title('VGG8 SVHN Super Convergence Loss')
    plt.legend(fig[:2], ['Training Losses', 'Validation Losses'])
    plt.xlabel('Epoch number')
    plt.ylabel('Loss [A.U.]')
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_svhn_sc_test_losses.png'))
    plt.close()

    fig = plt.plot(test_error, 'r-s')
    plt.title('VGG8 SVHN Super Convergence Test Error')
    plt.legend(fig[:1], ['Test Error'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test Error [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_svhn_sc_test_error.png'))
    plt.close()

    fig = plt.plot(test_acc, 'r-s')
    plt.title('VGG8 SVHN Super Convergence Accuracy')
    plt.legend(fig[:1], ['Test accuracy'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test accuracy [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_svhn_sc_test_accuracy.png'))
    plt.close()

In [None]:
def plot_results(train_losses, valid_losses, test_error, test_acc):
    """Plot results.
    Args:
        train_losses: training losses as calculated in the training_loop
        valid_losses: validation losses as calculated in the training_loop
        test_error: test error as calculated in the training_loop
    """
    fig = plt.plot(train_losses, 'r-s', valid_losses, 'b-o')
    plt.title('VGG8 SVHN Loss')
    plt.legend(fig[:2], ['Training Losses', 'Validation Losses'])
    plt.xlabel('Epoch number')
    plt.ylabel('Loss [A.U.]')
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_svhn_nsc_test_losses.png'))
    plt.close()

    fig = plt.plot(test_error, 'r-s')
    plt.title('VGG8 SVHN Test Error')
    plt.legend(fig[:1], ['Test Error'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test Error [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_svhn_nsc_test_error.png'))
    plt.close()

    fig = plt.plot(test_acc, 'r-s')
    plt.title('VGG8 SVHN Accuracy')
    plt.legend(fig[:1], ['Test accuracy'])
    plt.xlabel('Epoch number')
    plt.ylabel('Test accuracy [%]')
    plt.ylim((0, 1e2))
    plt.grid(which='both', linestyle='--')
    plt.savefig(os.path.join(RESULTS, 'vgg8_svhn_nsc_test_accuracy.png'))
    plt.close()


### Main() - Super Convergence

In [None]:
# set seed
torch.manual_seed(SEED)

<torch._C.Generator at 0x7eff6452e870>

In [None]:
# Load datasets.
train_data, validation_data = load_images_svhn()

Normalization data: (tensor([0.4377, 0.4438, 0.4728]),tensor([0.1980, 0.2010, 0.1970]))
Using downloaded and verified file: ./vgg8_svhn_data/train_32x32.mat
Using downloaded and verified file: ./vgg8_svhn_data/test_32x32.mat


In [None]:
# Prepare the model.
model = VGG8()
model.to(device)
print(model)

Sequential(
  (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU()
  (14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (16): ReLU()
  (17): MaxPool2d(ker

In [None]:
# optimizer
optimizer = create_sgd_optimizer(model, LEARNING_RATE)

# super convergence scheduler
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 
                                              base_lr = 0.001, 
                                              max_lr= 0.1, 
                                              step_size_up= math.floor(N_EPOCHS/2), 
                                              cycle_momentum=True, 
                                              base_momentum=0.95, 
                                              max_momentum=0.8)
# loss function
criterion = nn.CrossEntropyLoss()

In [None]:
print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
  f'Started Vgg8 Example')

# training loop
model, optimizer, _ = train_val_loop_sc(model, criterion, optimizer, 
                                    train_data, validation_data,
                                        N_EPOCHS, scheduler)

print(f'{datetime.now().time().replace(microsecond=0)} --- '
  f'Completed Vgg8 Example')


20:02:39 --- Started Vgg8 Example
20:03:35 --- Epoch: 0	Train loss: 2.1349	Valid loss: 1.9536	Training accuracy: 24.58%	Test error: 65.47%	Test accuracy: 34.53%	
20:04:31 --- Epoch: 1	Train loss: 0.8008	Valid loss: 0.5395	Training accuracy: 75.72%	Test error: 16.31%	Test accuracy: 83.69%	
20:05:28 --- Epoch: 2	Train loss: 0.4029	Valid loss: 0.4098	Training accuracy: 87.95%	Test error: 11.91%	Test accuracy: 88.09%	
20:06:24 --- Epoch: 3	Train loss: 0.2889	Valid loss: 0.3408	Training accuracy: 91.48%	Test error: 10.15%	Test accuracy: 89.85%	
20:07:20 --- Epoch: 4	Train loss: 0.2356	Valid loss: 0.2743	Training accuracy: 93.05%	Test error: 7.87%	Test accuracy: 92.13%	
20:08:17 --- Epoch: 5	Train loss: 0.2073	Valid loss: 0.2500	Training accuracy: 93.95%	Test error: 7.14%	Test accuracy: 92.86%	
20:09:13 --- Epoch: 6	Train loss: 0.1794	Valid loss: 0.3523	Training accuracy: 94.79%	Test error: 10.42%	Test accuracy: 89.58%	
20:10:09 --- Epoch: 7	Train loss: 0.1670	Valid loss: 0.2866	Training ac

### Main() - Non Super Convergence

In [None]:
# set seed
torch.manual_seed(SEED)

<torch._C.Generator at 0x7eff6452e870>

In [None]:
# Load datasets.
train_data, validation_data = load_images_svhn()

Normalization data: (tensor([0.4377, 0.4438, 0.4728]),tensor([0.1980, 0.2010, 0.1970]))
Using downloaded and verified file: ./vgg8_svhn_data/train_32x32.mat
Using downloaded and verified file: ./vgg8_svhn_data/test_32x32.mat


In [None]:
# Prepare the model.
model = VGG8()
model.to(device)
print(model)

Sequential(
  (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU()
  (14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (16): ReLU()
  (17): MaxPool2d(ker

In [None]:
# optimizer
optimizer = create_sgd_optimizer(model, LEARNING_RATE)

# loss function
criterion = nn.CrossEntropyLoss()

In [None]:
print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
  f'Started Vgg8 Example')

# training loop
model, optimizer, _ = train_val_loop_nsc(model, criterion, optimizer, 
                                    train_data, validation_data, N_EPOCHS)

print(f'{datetime.now().time().replace(microsecond=0)} --- '
  f'Completed Vgg8 Example')


20:28:05 --- Started Vgg8 Example
20:29:01 --- Epoch: 0	Train loss: 2.2317	Valid loss: 2.1885	Training accuracy: 18.78%	Test error: 78.92%	Test accuracy: 21.08%	
20:29:57 --- Epoch: 1	Train loss: 2.1679	Valid loss: 2.1285	Training accuracy: 21.90%	Test error: 75.52%	Test accuracy: 24.48%	
20:30:53 --- Epoch: 2	Train loss: 2.1058	Valid loss: 2.0580	Training accuracy: 26.04%	Test error: 70.34%	Test accuracy: 29.66%	
20:31:49 --- Epoch: 3	Train loss: 2.0221	Valid loss: 1.9607	Training accuracy: 31.82%	Test error: 66.04%	Test accuracy: 33.96%	
20:32:45 --- Epoch: 4	Train loss: 1.9037	Valid loss: 1.8232	Training accuracy: 38.91%	Test error: 57.73%	Test accuracy: 42.27%	
20:33:42 --- Epoch: 5	Train loss: 1.7494	Valid loss: 1.6552	Training accuracy: 46.79%	Test error: 48.67%	Test accuracy: 51.33%	
20:34:38 --- Epoch: 6	Train loss: 1.5761	Valid loss: 1.4839	Training accuracy: 54.19%	Test error: 40.52%	Test accuracy: 59.48%	
20:35:34 --- Epoch: 7	Train loss: 1.4084	Valid loss: 1.3318	Training 