Train MNIST
-------------------------

In [1]:
from __future__ import print_function
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import os
cuda = torch.cuda.is_available() # actually GPU is not needed here
device = 'cuda' if cuda else 'cpu'
device

'cpu'

Import from the git. We use here capture to suppress output that could disclose author information. If you run this cell, you can of course delete the capture command to get some feedback whether the cloning worked.

In [2]:
%%capture
!git clone https://github.com/kynehc/clone_anonymous_github.git
%cd clone_anonymous_github
!python3 download.py --url https://anonymous.4open.science/r/GaussNetworks-A7E7/ --dir gauss_nets/
%cd gauss_nets
!ls

In [3]:
import train
from train.models import *
from train import *
from train.layers import *
from train.loss import *
from train.optimization import Optimizer

Load the data

In [4]:
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
c=10

# Data
print('==> Preparing data..')
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=trans)
trainloader = DataLoader(train_data, batch_size=128, shuffle=True, num_workers=2)


testset = datasets.MNIST(root='./data', train=False, download=True, transform=trans)
testloader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=2)

==> Preparing data..


In [5]:
path = "checkpoint/"
def load_net(name,architecture):
    checkpoint = torch.load(path+name+'.t7',map_location='cpu')
    architecture.load_state_dict(checkpoint['net'])
    print(name+' ACC:',checkpoint['acc'])
    return architecture

In [6]:
d=10
name="MNIST"

# Train Softmax

In [7]:
# Model
print('==> Building model..')
classifier = nn.Linear(d, c,bias=True)
net = LeNet(embedding_dim=d, classifier = classifier)
net = net.to(device)

criterion = CE_Loss(classifier, c, device)

sgd = optim.SGD([{'params': net.parameters()},],
                lr=0.1, momentum=0.9, weight_decay=5e-4)
optimizer = Optimizer(sgd, trainloader, device)

==> Building model..


In [8]:
epoch_offset = 0
for lr,max_epochs in [(0.1,40),(0.01,20)]:
    optimizer.optimizer.param_groups[0]['lr'] = lr
    print("===== Optimize with step size ",lr)
    for epoch in range(epoch_offset, epoch_offset+max_epochs):
        print('\nEpoch: %d' % epoch)
        optimizer.train_epoch(net, criterion)
        (acc,conf) = optimizer.test_acc(net,criterion, testloader)
    epoch_offset+= max_epochs

print('Saving..')
state = {
    'net': net.state_dict(),
    'acc': acc
}
torch.save(state, './checkpoint/%s%s%s.t7'%(name,net.__class__.__name__,criterion.classifier.__class__.__name__))


===== Optimize with step size  0.1

Epoch: 0
Loss: 0.415 | Acc: 86.212% (51727/60000) | Conf 85.52 | time (s): 7.60
Loss: 0.081 | Acc: 97.720% (9772/10000) | Conf 97.57

Epoch: 1
Loss: 0.079 | Acc: 97.742% (58645/60000) | Conf 97.77 | time (s): 7.74
Loss: 0.063 | Acc: 97.800% (9780/10000) | Conf 98.12

Epoch: 2
Loss: 0.061 | Acc: 98.225% (58935/60000) | Conf 98.19 | time (s): 7.60
Loss: 0.064 | Acc: 98.030% (9803/10000) | Conf 97.70

Epoch: 3
Loss: 0.055 | Acc: 98.438% (59063/60000) | Conf 98.37 | time (s): 7.61
Loss: 0.043 | Acc: 98.620% (9862/10000) | Conf 98.49

Epoch: 4
Loss: 0.049 | Acc: 98.565% (59139/60000) | Conf 98.53 | time (s): 7.62
Loss: 0.049 | Acc: 98.450% (9845/10000) | Conf 98.62

Epoch: 5
Loss: 0.046 | Acc: 98.632% (59179/60000) | Conf 98.56 | time (s): 7.61
Loss: 0.045 | Acc: 98.520% (9852/10000) | Conf 98.51

Epoch: 6
Loss: 0.041 | Acc: 98.798% (59279/60000) | Conf 98.71 | time (s): 7.58
Loss: 0.049 | Acc: 98.520% (9852/10000) | Conf 98.65

Epoch: 7
Loss: 0.041 | Acc

# Train Gauss

Train Softmax Warmup Model if the warmup (base) model does not exist yet.

In [9]:
warmup, epoch_offset = 10, 0
if not os.path.exists('./checkpoint/%s_baseLeNetGauss.t7'%(name)):
    for lr,max_epochs in [(0.1,40),(0.01,20)]:
        optimizer.optimizer.param_groups[0]['lr'] = lr
        print("===== Optimize with step size ",lr)
        for epoch in range(epoch_offset, min(epoch_offset+max_epochs, warmup)):
            print('\nEpoch: %d' % epoch)
            optimizer.train_epoch(net, criterion)
            (acc,conf) = optimizer.test_acc(net,criterion, testloader)
        epoch_offset+= max_epochs

    print('Saving..')
    state = {
        'net': net.state_dict(),
        'acc': acc
    }
    torch.save(state, './checkpoint/%s_base%s%s.t7'%(name,net.__class__.__name__,criterion.classifier.__class__.__name__))

Compute class centroids

In [10]:
optimizer = Optimizer(None, trainloader, device)
net = LeNet(embedding_dim=d, classifier = nn.Linear(d, c,bias=True))
net = load_net('MNIST_baseLeNetLinear',net)
classifier = Gauss(in_features = d, out_features = c, gamma=0.5)
net.classifier = classifier
net = net.to(device)

criterion = CE_GALoss(classifier, c, device)
criterion = criterion.to(device)

optimizer.optimize_centroids(net)
print("Test accuracy after computing class centroids and predicting with Gauss confidence:")
(acc,conf) = optimizer.test_acc(net,criterion, testloader)

state = {
    'net': net.state_dict(),
    'acc': acc
}
torch.save(state, './checkpoint/%s_base%s%s.t7'%(name,net.__class__.__name__,criterion.classifier.__class__.__name__))

MNIST_baseLeNetLinear ACC: tensor(98.9417)
Test accuracy after computing class centroids and predicting with Gauss confidence:
Loss: 6.552 | Acc: 98.600% (9860/10000) | Conf 11.41


Train the Gauss network

In [11]:
# Model
print('==> Building model..')
classifier = Gauss(in_features = d, out_features = c, gamma=1.,gamma_min=0.1)
net = LeNet(embedding_dim=d, classifier = classifier)
net = load_net('MNIST_baseLeNetGauss',net)
net = net.to(device)

criterion = CE_GALoss(classifier, c, device)
criterion=criterion.to(device)

sgd = optim.SGD([
                {'params': net.embed.parameters()},
                {'params': criterion.classifier.weight, 'weight_decay': 0},
                {'params': criterion.classifier.gamma, 'weight_decay': 0},
                {'params': criterion.delta, 'weight_decay': 5e-3}],
                lr=0.1, momentum=0.9, weight_decay=5e-4)
optimizer = Optimizer(sgd, trainloader, device)

==> Building model..
MNIST_baseLeNetGauss ACC: tensor(98.6000)


In [12]:
warmup, epoch_offset = 10, 0
for lr, max_epochs in [(0.1,40),(0.01,20)]:
    lr*=0.2
    for param_group in optimizer.optimizer.param_groups:
        param_group['lr']=lr
    print("==== Optimize with step size ",lr)
    for epoch in range(max(epoch_offset,warmup), epoch_offset+max_epochs):
        print('\nEpoch: %d' % epoch)
        optimizer.train_epoch(net, criterion)
        (acc,conf) = optimizer.test_acc(net,criterion, testloader)
        if epoch%5==0:
            with torch.no_grad():
                margins = criterion.classifier.get_margins()
                gamma = criterion.classifier.gamma
                delta = criterion.delta
                print('Min margin: %.2f, mean margin: %.2f +- %.3f'% (torch.min(margins), torch.mean(margins), torch.std(margins)))
                print('gamma: %.2f +- %.3f'% (torch.mean(gamma), torch.std(gamma)))
                print('delta: %.2f +- %.3f'% (torch.mean(delta), torch.std(delta)))
    epoch_offset+=max_epochs
print('Saving..')
state = {
    'net': net.state_dict(),
    'acc': acc
}
torch.save(state, './checkpoint/%s%s%s.t7'%(name,net.__class__.__name__,criterion.classifier.__class__.__name__))

==== Optimize with step size  0.020000000000000004

Epoch: 10
Loss: 0.451 | Acc: 99.148% (59489/60000) | Conf 64.88 | time (s): 7.79
Loss: 0.327 | Acc: 99.120% (9912/10000) | Conf 71.29
Min margin: 11.49, mean margin: 14.81 +- 1.863
gamma: 0.10 +- 0.000
delta: 0.38 +- 0.057

Epoch: 11
Loss: 0.269 | Acc: 99.365% (59619/60000) | Conf 74.27 | time (s): 7.84
Loss: 0.276 | Acc: 99.240% (9924/10000) | Conf 76.01

Epoch: 12
Loss: 0.227 | Acc: 99.465% (59679/60000) | Conf 77.85 | time (s): 7.86
Loss: 0.252 | Acc: 99.190% (9919/10000) | Conf 78.40

Epoch: 13
Loss: 0.200 | Acc: 99.540% (59724/60000) | Conf 80.36 | time (s): 7.90
Loss: 0.230 | Acc: 99.210% (9921/10000) | Conf 80.87

Epoch: 14
Loss: 0.180 | Acc: 99.587% (59752/60000) | Conf 82.16 | time (s): 7.84
Loss: 0.235 | Acc: 99.150% (9915/10000) | Conf 82.24

Epoch: 15
Loss: 0.170 | Acc: 99.622% (59773/60000) | Conf 83.07 | time (s): 7.96
Loss: 0.200 | Acc: 99.240% (9924/10000) | Conf 84.00
Min margin: 10.87, mean margin: 13.93 +- 1.717
gam

# Train DUQ model

In [13]:
# Model
print('==> Building model..')
classifier = Gauss_DUQ(in_features = d, out_features = c, gamma=1)
net = LeNet(embedding_dim=d, classifier = classifier)
net = net.to(device)

criterion = BCE_DUQLoss(classifier, c, device)
criterion=criterion.to(device)

sgd = optim.SGD([{'params': net.parameters()}],
                lr=0.1, momentum=0.9, weight_decay=5e-4)
optimizer = Optimizer(sgd, trainloader, device, update_centroids = True)

==> Building model..


In [14]:
epoch_offset =0

for lr, max_epochs in [(0.05,25),(0.01,25),(0.002,25)]:
    sgd.param_groups[0]['lr'] = lr
    print("===Optimize with step size ",lr)
    for epoch in range(epoch_offset, epoch_offset+max_epochs):
        print('\nEpoch: %d' % epoch)
        optimizer.train_epoch(net, criterion, weight_gp_pred=1)
        (acc,conf) = optimizer.test_acc(net,criterion, testloader)
    epoch_offset+= max_epochs
    
print('Saving..')
state = {
    'net': net.state_dict(),
    'acc': acc
}
torch.save(state, './checkpoint/%s%s%s.t7'%(name,net.__class__.__name__,criterion.classifier.__class__.__name__))


===Optimize with step size  0.05

Epoch: 0
Loss: 0.384 | Acc: 19.910% (11946/60000) | Conf 19.49 | time (s): 17.26
Loss: 0.211 | Acc: 60.200% (6020/10000) | Conf 44.04

Epoch: 1
Loss: 0.141 | Acc: 82.363% (49418/60000) | Conf 64.12 | time (s): 17.54
Loss: 0.083 | Acc: 91.850% (9185/10000) | Conf 72.20

Epoch: 2
Loss: 0.092 | Acc: 93.257% (55954/60000) | Conf 73.47 | time (s): 17.42
Loss: 0.064 | Acc: 95.000% (9500/10000) | Conf 74.13

Epoch: 3
Loss: 0.074 | Acc: 95.642% (57385/60000) | Conf 77.31 | time (s): 17.34
Loss: 0.056 | Acc: 96.330% (9633/10000) | Conf 77.89

Epoch: 4
Loss: 0.065 | Acc: 96.390% (57834/60000) | Conf 79.71 | time (s): 18.73
Loss: 0.051 | Acc: 96.910% (9691/10000) | Conf 76.34

Epoch: 5
Loss: 0.060 | Acc: 96.875% (58125/60000) | Conf 81.10 | time (s): 19.44
Loss: 0.045 | Acc: 97.350% (9735/10000) | Conf 82.25

Epoch: 6
Loss: 0.057 | Acc: 97.172% (58303/60000) | Conf 81.91 | time (s): 17.61
Loss: 0.043 | Acc: 97.700% (9770/10000) | Conf 81.06

Epoch: 7
Loss: 0.053 

Loss: 0.032 | Acc: 98.380% (9838/10000) | Conf 86.06

Epoch: 60
Loss: 0.038 | Acc: 98.420% (59052/60000) | Conf 86.34 | time (s): 17.55
Loss: 0.032 | Acc: 98.410% (9841/10000) | Conf 86.78

Epoch: 61
Loss: 0.038 | Acc: 98.420% (59052/60000) | Conf 86.14 | time (s): 17.51
Loss: 0.032 | Acc: 98.380% (9838/10000) | Conf 88.48

Epoch: 62
Loss: 0.039 | Acc: 98.390% (59034/60000) | Conf 86.03 | time (s): 17.47
Loss: 0.034 | Acc: 98.350% (9835/10000) | Conf 84.31

Epoch: 63
Loss: 0.039 | Acc: 98.385% (59031/60000) | Conf 85.88 | time (s): 17.56
Loss: 0.032 | Acc: 98.380% (9838/10000) | Conf 87.59

Epoch: 64
Loss: 0.038 | Acc: 98.403% (59042/60000) | Conf 86.13 | time (s): 17.62
Loss: 0.032 | Acc: 98.350% (9835/10000) | Conf 85.70

Epoch: 65
Loss: 0.039 | Acc: 98.378% (59027/60000) | Conf 85.81 | time (s): 17.46
Loss: 0.033 | Acc: 98.340% (9834/10000) | Conf 86.03

Epoch: 66
Loss: 0.039 | Acc: 98.407% (59044/60000) | Conf 85.93 | time (s): 17.63
Loss: 0.032 | Acc: 98.390% (9839/10000) | Conf 8

# Load and Inspect Models

In [15]:
net_sm = LeNet(embedding_dim = d, classifier= nn.Linear(in_features = d, out_features = c))
net_duq = LeNet(embedding_dim = d,classifier= Gauss_DUQ(in_features = d, out_features = c, gamma=1))
net_ga = LeNet(embedding_dim=d, classifier=Gauss(in_features = d, out_features = c, gamma=10))

In [16]:
net_sm = load_net('MNISTLeNetLinear',net_sm).to(device)
net_duq = load_net('MNISTLeNetGauss_DUQ',net_duq).to(device)
net_ga = load_net('MNISTLeNetGauss',net_ga).to(device)

MNISTLeNetLinear ACC: tensor(99.2600)
MNISTLeNetGauss_DUQ ACC: tensor(98.3300)
MNISTLeNetGauss ACC: tensor(99.2700)


In [17]:
criterion_sm = CE_Loss(net_sm.classifier, c, device).to(device)
criterion_ga = CE_GALoss(net_ga.classifier, c, device).to(device)
criterion_duq = BCE_DUQLoss(net_duq.classifier, c, device).to(device)

In [18]:
(acc,conf) = optimizer.test_acc(net_sm,criterion_sm, testloader)
(acc,conf) = optimizer.test_acc(net_duq,criterion_duq, testloader)
(acc,conf) = optimizer.test_acc(net_ga,criterion_ga, testloader)

Loss: 0.021 | Acc: 99.260% (9926/10000) | Conf 99.34
Loss: 0.032 | Acc: 98.330% (9833/10000) | Conf 86.32
Loss: 0.186 | Acc: 99.270% (9927/10000) | Conf 92.48
