In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from utilities.imports import *

In [3]:
#typical mnist setup
MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'
x_train, y_train, x_valid, y_valid = get_data(MNIST_URL)
x_train, x_valid = normalize_to(x_train, x_valid)

number_hidden = 50
batch_size = 512
num_categories = y_train.max().item()+1

loss_function = F.cross_entropy

training_ds = Dataset(x_train, y_train)
validation_ds = Dataset(x_valid, y_valid)
train_dl = DataLoader(training_ds, batch_size, shuffle = True) #random sampler
valid_dl = DataLoader(validation_ds, batch_size*2, shuffle = False) #sequential sampler
number_fields = [8,16,32,32]

## Training With Kaiming Normal Initalization

In [6]:
#schedule 50/50 for 2 phases, increasing cosine (warmup), decreasing cosine(cooldown)
lr_schedule = combine_schedules([0.5,0.5], [cosine_scheduler(0.2, 1) ,cosine_scheduler(1.,0.1)])
model = get_cnn_model(num_categories, number_fields, conv_layer,  leak=0.1,sub_value=0.4, value_cuttoff=6.)
init_cnn(model)
opt = optim.SGD(model.parameters(), lr = 1.)
cb_functions = [Recorder, partial(AvgStatsCallback, accuracy), CudaCallback, partial(IndependentVarBatchTransformCallback, mnist_resize), partial(ParamScheduler, 'lr', lr_schedule)]
runner = Runner(cb_funcs=cb_functions)

In [7]:
runner.fit(8, model, opt, loss_function, train_dl, valid_dl)

train: [0.9533771875, tensor(0.7174, device='cuda:0')]
valid: [0.3306894775390625, tensor(0.9043, device='cuda:0')]
train: [0.2955844140625, tensor(0.9103, device='cuda:0')]
valid: [0.472639013671875, tensor(0.8584, device='cuda:0')]
train: [0.2029867578125, tensor(0.9380, device='cuda:0')]
valid: [0.126493798828125, tensor(0.9623, device='cuda:0')]
train: [0.124003349609375, tensor(0.9622, device='cuda:0')]
valid: [0.1037725830078125, tensor(0.9687, device='cuda:0')]
train: [0.085284580078125, tensor(0.9737, device='cuda:0')]
valid: [0.08347034301757812, tensor(0.9757, device='cuda:0')]
train: [0.0645839208984375, tensor(0.9805, device='cuda:0')]
valid: [0.073340966796875, tensor(0.9783, device='cuda:0')]
train: [0.050076416015625, tensor(0.9853, device='cuda:0')]
valid: [0.06876021118164062, tensor(0.9801, device='cuda:0')]
train: [0.04373033203125, tensor(0.9876, device='cuda:0')]
valid: [0.06655730590820312, tensor(0.9805, device='cuda:0')]


## Training With Kaiming Uniform Initalization

In [9]:
init_cnn(model, True)
opt = optim.SGD(model.parameters(), lr = 1.)
cb_functions = [Recorder, partial(AvgStatsCallback, accuracy), CudaCallback, partial(IndependentVarBatchTransformCallback, mnist_resize), partial(ParamScheduler, 'lr', lr_schedule)]
runner = Runner(cb_funcs=cb_functions)

In [10]:
runner.fit(8, model, opt, loss_function, train_dl, valid_dl)

train: [0.4697895703125, tensor(0.8572, device='cuda:0')]
valid: [0.19474404296875, tensor(0.9421, device='cuda:0')]
train: [0.18486501953125, tensor(0.9420, device='cuda:0')]
valid: [0.1250172607421875, tensor(0.9635, device='cuda:0')]
train: [0.21616162109375, tensor(0.9355, device='cuda:0')]
valid: [0.11903450927734376, tensor(0.9666, device='cuda:0')]
train: [0.093015712890625, tensor(0.9712, device='cuda:0')]
valid: [0.09177864379882812, tensor(0.9710, device='cuda:0')]
train: [0.0703732470703125, tensor(0.9783, device='cuda:0')]
valid: [0.07633209838867187, tensor(0.9752, device='cuda:0')]
train: [0.04990337890625, tensor(0.9849, device='cuda:0')]
valid: [0.064880078125, tensor(0.9797, device='cuda:0')]
train: [0.038726015625, tensor(0.9893, device='cuda:0')]
valid: [0.06179990844726563, tensor(0.9809, device='cuda:0')]
train: [0.0333815185546875, tensor(0.9910, device='cuda:0')]
valid: [0.06074375, tensor(0.9814, device='cuda:0')]
