In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
#export
from exp.nb_06 import *

### ConvNet

In [3]:
x_train,y_train,x_valid,y_valid = get_data()

x_train,x_valid = normalize_to(x_train,x_valid)
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [9]:
mnist_view = view_tfm(1,28,28)
cbfs = [Recorder,
        partial(AvgStatsCallback,accuracy),
        #CudaCallback,
        partial(BatchTransformXCallback, mnist_view)]

In [10]:
nfs = [8,16,32,64,64]

In [11]:
learn,run = get_learn_run(nfs, data, 0.4, conv_layer, cbs=cbfs)

In [12]:
run.fit(2, learn)

train: [0.992829921875, tensor(0.6856)]
valid: [0.19813443603515624, tensor(0.9415)]
train: [0.161064638671875, tensor(0.9508)]
valid: [0.1241411376953125, tensor(0.9646)]


### Batchnorm

Batchnorm [paper](https://arxiv.org/pdf/1502.03167.pdf)

In [16]:
#export
def init_cnn_(m, f):
    if isinstance(m, nn.Conv2d):
        f(m.weight, a=0.1)
        if getattr(m, 'bias', None) is not None: m.bias.data.zero_()
    for l in m.children(): init_cnn_(l, f)

def init_cnn(m, uniform=False):
    f = init.kaiming_uniform_ if uniform else init.kaiming_normal_
    init_cnn_(m, f)

def get_learn_run(nfs, data, lr, layer, cbs=None, opt_func=None, uniform=False, **kwargs):
    model = get_cnn_model(data, nfs, layer, **kwargs)
    init_cnn(model, uniform=uniform)
    return get_runner(model, data, lr=lr, cbs=cbs, opt_func=opt_func)

In [17]:
#export
def conv_layer(ni, nf, ks=3, stride=2, bn=True, **kwargs):
    layers = [nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride, bias=not bn),
              GeneralRelu(**kwargs)]
    if bn: layers.append(nn.BatchNorm2d(nf, eps=1e-5, momentum=0.1))
    return nn.Sequential(*layers)

In [18]:
learn,run = get_learn_run(nfs, data, 1., conv_layer, cbs=cbfs)

In [19]:
run.fit(3, learn)

train: [0.2163815234375, tensor(0.9321)]
valid: [0.1172519287109375, tensor(0.9634)]
train: [0.065672353515625, tensor(0.9794)]
valid: [0.07271557006835938, tensor(0.9781)]
train: [0.044022802734375, tensor(0.9866)]
valid: [0.061623419189453126, tensor(0.9818)]


### Add scheduler

In [20]:
sched = combine_scheds([0.3, 0.7], [sched_lin(0.6, 2.), sched_lin(2., 0.1)])

In [21]:
learn,run = get_learn_run(nfs, data, 0.9, conv_layer, cbs=cbfs
                          +[partial(ParamScheduler,'lr', sched)])

In [22]:
run.fit(8, learn)

train: [0.24476443359375, tensor(0.9256)]
valid: [0.10582935791015625, tensor(0.9703)]
train: [0.0818483935546875, tensor(0.9749)]
valid: [0.09318789672851563, tensor(0.9717)]
train: [0.0584621728515625, tensor(0.9811)]
valid: [0.0768798828125, tensor(0.9754)]
train: [0.0349121337890625, tensor(0.9886)]
valid: [0.05381375732421875, tensor(0.9836)]
train: [0.02244583984375, tensor(0.9929)]
valid: [0.07415642700195313, tensor(0.9771)]
train: [0.014370880126953124, tensor(0.9957)]
valid: [0.04048168029785156, tensor(0.9877)]
train: [0.007841992797851563, tensor(0.9983)]
valid: [0.03838638916015625, tensor(0.9888)]
train: [0.005215344848632813, tensor(0.9992)]
valid: [0.03741629333496094, tensor(0.9887)]


### Export

In [23]:
nb_auto_export()

<IPython.core.display.Javascript object>