In [1]:
import copy
import pickle
import numpy as np
import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms
from utils import quantize, generate_dataset, training_algo, adding_noise_model, testing
from models_utils import MLP, Linear_noisy, Noisy_Inference
from collections import OrderedDict

# what device is the code running on?
if torch.backends.mps.is_available(): device = torch.device('mps')
else: 
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#device = 'cpu'
print(device)

mps


## Load CIFAR100 or Mini Imagenet
---

In [2]:
batch_size = 128
epochs = 120
max_lr = 0.001
grad_clip = 0.01
weight_decay =0.001

In [3]:
train_loader, test_loader = generate_dataset( task='cifar10', train_batch_size=batch_size, test_batch_size=batch_size*2 )

In [4]:
x, y = next(iter( train_loader ))

## Loading the CNN model 
---
For the moment, only mobilenet_v2 is supported

In [10]:
from utils_mobilenet_v2 import mobilenet_v2_noise as mobilenet_v2
#from torchvision.models.mobilenetv2 import mobilenet_v2
#model = mobilenet_v2( weights=None ) #mobilenet_v2( weights='IMAGENET1K_V1' )
model = mobilenet_v2( weights='IMAGENET1K_V2' )
model.classifier[1] = torch.nn.Linear( in_features=model.classifier[1].in_features, out_features=10 )

In [5]:
#from torchvision.models.efficientnet_b0 import efficientnet_b0
#model = mobilenet_v2( weights=None ) #mobilenet_v2( weights='IMAGENET1K_V1' )
from torchvision.models import efficientnet_b0
model = efficientnet_b0( weights='IMAGENET1K_V1' )
model.classifier = torch.nn.Linear( in_features=model.classifier[1].in_features, out_features=10 )

In [11]:
tot_params = 0
for p in model.parameters():
    tot_params += len( p.flatten() )
print(f'mobilenet_v2 has {tot_params} params')

mobilenet_v2 has 2236682 params


## Training the CNN
---

In [12]:
def training_algo( training_type, model, data_loaders, device='cpu', lr=1e-3, clip_w=2.5, epochs=10, epochs_noise=2, 
                   noise_sd=1e-2, noise_every=100, levels=None, num_levels=15, print_every=1, verbose=False ):

    train_loader, test_loader = data_loaders
    criterion = torch.nn.NLLLoss() #torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD( model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4 ) #torch.optim.Adam( model.parameters(), lr=lr )
    #optimizer = torch.optim.Adam( model.parameters(), lr=lr, weight_decay=5e-4 )
    model = model.to(device)

    losses_train, accs_train = [], []
    for e in range(epochs):
        losses = []
        correct = 0
        tot_samples = 0
        for batch_idx, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            x = x.to(device)
            y = y.to(device)
            yhat = model( x )
            y_soft = torch.nn.functional.log_softmax( yhat, dim=1 )
            loss = criterion( y_soft, y.long() )
            loss.backward()

            if training_type == 'qat_noise' or training_type == 'qat':
                for p in list(model.parameters()):
                        if hasattr(p,'hid'):
                            p.data.copy_(p.hid)

            optimizer.step()
            losses.append( loss.item() )
            correct += torch.eq( torch.argmax(yhat, dim=1), y ).cpu().sum()
            tot_samples += len(y)

            #if e+1 > epochs - epochs_noise and batch_idx%noise_every==0 and training_type == 'noise_fine_tuning':
            #    with torch.no_grad():
            #        for p in model.parameters():
            #            delta_w = torch.abs( p.max()-p.min() )
            #            n = torch.randn_like( p )*(noise_sd*delta_w)
            #            p.copy_( p+n )
            
            if clip_w is not None:
                with torch.no_grad():
                    for p in model.parameters():
                        std_w = torch.std( p )
                        p.clip_( -std_w*clip_w, +std_w*clip_w )

            if training_type == 'qat':
                for p in list(model.parameters()):  # updating the hid attribute
                    if hasattr(p,'hid'):
                        p.hid.copy_(p.data)
                    p.data = quantize( parameters=p.data, levels=levels, num_levels=num_levels, device=device )

            if training_type == 'qat_noise':
                for p in list(model.parameters()):  # updating the hid attribute
                    if hasattr(p,'hid'):
                        p.hid.copy_(p.data)
                    p.data = quantize( parameters=p.data, levels=levels, num_levels=num_levels, device=device )
                    p.data.add_( torch.randn_like(p.data)*noise_sd )

        acc_train = correct/tot_samples
        loss_train = np.mean(losses)
        if verbose and e%print_every==0:
            print( f'Epoch {e}, Train accuracy {acc_train*100:.2f}% Test loss {loss_train:.4f}' )
        accs_train.append(acc_train); losses_train.append(loss_train)

    losses = []
    correct = 0
    tot_samples = 0
    model.eval()
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)
        yhat = model( x )
        y_soft = torch.nn.functional.log_softmax( yhat, dim=1 )
        loss = criterion( y_soft, y.long() )
        losses.append( loss.item() )
        correct += torch.eq( torch.argmax(yhat, dim=1), y ).cpu().sum()
        tot_samples += len(y)
    acc_test = correct/tot_samples
    loss_test = np.mean(losses)
    if verbose: print( f'-- Test accuracy {acc_test*100:.2f}% Test loss {loss_test:.4f}' )

    return model, [accs_train, losses_train], [acc_test, loss_test]


def testing( model, test_loader, device='cpu', verbose=True ):
    '''The function assessing the test classification accuracy of the model.
    model: model of choice
    test_loader: the test dataloader for the task of choice
    verbose: if True, makes the function output the test accuracy and loss'''
    model = model.to(device)
    losses = []
    correct, tot_samples = 0, 0
    criterion = torch.nn.NLLLoss() #torch.nn.CrossEntropyLoss()
    model.eval()
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)
        yhat = model( x )
        y_soft = torch.nn.functional.log_softmax( yhat, dim=1 )
        loss = criterion( y_soft, y.long() )
        losses.append( loss.item() )
        correct += torch.eq( torch.argmax(yhat, dim=1), y ).cpu().sum()
        tot_samples += len(y)
    acc_test = correct/tot_samples
    loss_test = np.mean(losses)
    if verbose: print( f'-- Test accuracy {acc_test*100:.2f}% Test loss {loss_test:.4f}' )

    return acc_test, loss_test


In [13]:
data_loaders = [train_loader, test_loader]# [trainloader, testloader]
model_trained, [accs_train, losses_train], [acc_test, loss_test] = training_algo( training_type='normal', model=model, data_loaders=data_loaders,
                                                                                    clip_w=None, lr=1e-2, epochs=1, epochs_noise=2, 
                                                                                    print_every=1, verbose=True, device=device )

Epoch 0, Train accuracy 47.46% Test loss 1.4597
-- Test accuracy 63.67% Test loss 1.0362
