## Hyper-parameter

In [1]:
import sys
sys.path.append('..')
import utils
import mxnet as mx

num_epochs = 200
learning_rate = 0.3
weight_decay = 5e-4
lr_period = 30
lr_decay = 0.95
batch_size = 30
epoch_period = 10
theta = 0.1
ctx = utils.try_gpu()

In [2]:
clipping_norm = 0.1
eps = 20
delta = 1e-5

## Define Model

In [3]:
from mxnet import gluon
from mxnet import init
drop_prob1 = 0.1
drop_prob2 = 0.1
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(64, activation="relu"))
    net.add(gluon.nn.Dropout(drop_prob1))
    net.add(gluon.nn.Dense(32, activation="relu"))
    net.add(gluon.nn.Dropout(drop_prob2))
    net.add(gluon.nn.Dense(10))
net.initialize(ctx=ctx, init=init.Xavier())

## Loading data and define Loss and Trainer

In [4]:
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import image
import utils
import numpy as np


fmnist_root="~/.mxnet/datasets/fashion-mnist"
mnist_root = "~/.mxnet/datasets/mnist"

def easy_transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

def transform_train(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 32, 32), resize=0, 
                        rand_crop=False, rand_resize=False, rand_mirror=True,
                        mean=np.array([0.4914, 0.4822, 0.4465]), 
                        std=np.array([0.2023, 0.1994, 0.2010]), 
                        brightness=0, contrast=0, 
                        saturation=0, hue=0, 
                        pca_noise=0, rand_gray=0, inter_method=2)
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

def transform_test(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 32, 32), 
                        mean=np.array([0.4914, 0.4822, 0.4465]), 
                        std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

def dataset_split(data, ratio):
    split_point = int(len(data) * ratio)
    data=data[:]
    X = data[0]
    y = nd.array(data[1])
    n = X.shape[0]
    idx = np.arange(n)
    np.random.shuffle(idx)
    X = nd.array(X.asnumpy()[idx])
    y = nd.array(y.asnumpy()[idx])
    X_train = X[:split_point]
    y_train = y[:split_point]
    X_valid = X[split_point:]
    y_valid = y[split_point:]
    train_dataset= gluon.data.ArrayDataset(X_train, y_train)
    test_dataset = gluon.data.ArrayDataset(X_valid, y_valid)
    return train_dataset, test_dataset

def evaluate_loss(loss_func, data_iterator, net, ctx=[mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    total_loss = 0.0
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for data, label in data_iterator:
        cur_loss = softmax_cross_entropy(net(data), label).copyto(mx.cpu())
        total_loss += nd.mean(cur_loss).asscalar()
    return total_loss

validation_ratio = 0.3

mnist_train_valid = gluon.data.vision.MNIST(train=True, transform=easy_transform)
mnist_test = gluon.data.vision.MNIST(train=False, transform=easy_transform)
mnist_train, mnist_valid = dataset_split(mnist_train_valid, validation_ratio)

# cifar_train_valid = gluon.data.vision.CIFAR10(train=True, transform=easy_transform)
# cifar_test = gluon.data.vision.CIFAR10(train=False, transform=easy_transform)
# cifar_train, cifar_valid = dataset_split(cifar_train_valid, validation_ratio)

In [5]:
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True, last_batch='keep')
valid_data = gluon.data.DataLoader(mnist_valid, batch_size, shuffle=True, last_batch='keep')
train_valid_data = gluon.data.DataLoader(mnist_train_valid, batch_size, shuffle=True, last_batch='keep')

# 
# train_data = gluon.data.DataLoader(cifar_train, batch_size, shuffle=True, last_batch='keep')
# valid_data = gluon.data.DataLoader(cifar_valid, batch_size, shuffle=True, last_batch='keep')
# train_valid_data = gluon.data.DataLoader(mnist_train_valid, batch_size, shuffle=True, last_batch='keep')
# 

test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False, last_batch='keep')
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## Training and testing

In [6]:
import datetime

def train(net, train_data, valid_data, epochs, lr, wd, lr_period, 
          lr_decay, epoch_period, ctx, clipping_norm, eps, delta, noise=False, Tuning=False):
    trainer = gluon.Trainer(
        net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
    prev_time = datetime.datetime.now()
    if Tuning == False:
        net.collect_params().initialize(force_reinit=True)
    cur_train_loss = [evaluate_loss(softmax_cross_entropy,train_data, net, ctx) / len(train_data)]
    if valid_data is not None:
        cur_valid_loss = [evaluate_loss(softmax_cross_entropy,valid_data, net, ctx) / len(valid_data)]
    params = None
    print("start")
    for epoch in range(1, num_epochs + 1):
        train_loss = 0.0
        train_acc = 0.0
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
            lr *= lr_decay
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label) 
            if params == None:
                params_notation = net.collect_params()
                params = []
                for key in params_notation.keys():
                    params.append(params_notation[key].data())  
            loss.backward()
            if noise == True:
                utils.Noisy_SGD(params, lr / batch_size , clipping_norm, eps, delta, ctx)
            else:
#                 trainer.step(batch_size) 
                utils.SGD(params, lr / batch_size)
                
            train_loss += nd.mean(loss).asscalar()
            train_acc += utils.accuracy(output, label)
        
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        cur_train_loss.append(train_loss / len(train_data))
        
        if valid_data != None:
            valid_acc = utils.evaluate_accuracy(valid_data, net, ctx)   
            cur_valid_loss.append(evaluate_loss(softmax_cross_entropy,valid_data, net, ctx) / len(valid_data))
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, "
                            % (epoch, train_loss / len(train_data),
                            train_acc / len(train_data), valid_acc))
        else:
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f "
                            % (epoch, train_loss / len(train_data), train_acc / len(train_data)))
        
        prev_time = cur_time
        if epoch % epoch_period == 0 or epoch == epochs - 1 or epoch == 1:
            print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
                         
    if valid_data != None:
        return cur_train_loss, cur_valid_loss
    else:
        return cur_train_loss

In [None]:
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt

In [None]:
eps = 20
delta = 1e-5

train_loss, valid_loss = train(net, train_data, valid_data, num_epochs, 
                               learning_rate, weight_decay, lr_period, 
                               lr_decay, epoch_period, ctx, clipping_norm, eps, delta, noise=True)
plt.plot(train_loss)
plt.plot(valid_loss)
plt.legend(['train','valid'])
plt.xlabel('Num_epoch')
plt.ylabel('Loss')
plt.title('Noise SGD eps = 20, delta = 1e-5')

start
Epoch 1. Loss: 2.298100, Train acc 0.143722, Valid acc 0.162595, Time 00:00:05, lr 0.3
Epoch 10. Loss: 1.565969, Train acc 0.536667, Valid acc 0.603119, Time 00:00:06, lr 0.3
Epoch 20. Loss: 1.046254, Train acc 0.660111, Valid acc 0.732929, Time 00:00:05, lr 0.3
Epoch 30. Loss: 0.825634, Train acc 0.738667, Valid acc 0.797452, Time 00:00:05, lr 0.285
Epoch 40. Loss: 0.722120, Train acc 0.773500, Valid acc 0.824881, Time 00:00:06, lr 0.285


In [None]:
eps = 4
delta = 1e-5

train_loss_1, valid_loss_2 = train(net, train_data, valid_data, num_epochs, 
                               learning_rate, weight_decay, lr_period, 
                               lr_decay, epoch_period, ctx, clipping_norm, eps, delta, noise=True)
plt.plot(train_loss)
plt.plot(valid_loss)
plt.legend(['train','test'])
plt.xlabel('Num_epoch')
plt.ylabel('Loss')
plt.title('Noise SGD eps = 10, delta = 1e-5')

In [None]:
train_loss, valid_loss = train(net, train_data, valid_data, num_epochs, 
                               learning_rate, weight_decay, lr_period, 
                               lr_decay, epoch_period, ctx, clipping_norm, eps, delta, noise=False)

plt.plot(train_loss)
plt.plot(valid_loss)
plt.legend(['train','test'])
plt.xlabel('Num_epoch')
plt.ylabel('Loss')
plt.title('Normal SGD')