In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
from utilities.imports import *

In [3]:
#typical example setup
MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'
x_train, y_train, x_valid, y_valid = get_data(MNIST_URL)
x_train, x_valid = normalize_to(x_train, x_valid)

number_hidden = 50
batch_size = 512
num_categories = y_train.max().item()+1

loss_function = F.cross_entropy

#data 
training_ds = Dataset(x_train, y_train)
validation_ds = Dataset(x_valid, y_valid)
train_dl = DataLoader(training_ds, batch_size, shuffle = True) #random sampler
valid_dl = DataLoader(validation_ds, batch_size*2, shuffle = False) #sequential sampler
number_features = [8,16,32,32]

#callbacks
mnist_view = view_tfm(1,28,28)
sched = combine_schedules([0.3, 0.7], [linear_scheduler(0.6, 2.), linear_scheduler(2., 0.1)]) 
cbfs = [Recorder,
        partial(AvgStatsCallback, accuracy),
        CudaCallback,
        partial(IndependentVarBatchTransformCallback,mnist_view),
       partial(ParamScheduler, 'lr', sched)]

In [8]:
#for RNNs
class Layer_Normalization(nn.Module):
    __constants__ = ['eps']
    def __init__(self, epsilon=1e-5):
        super().__init__()
        self.eps = epsilon
        self.multiplier = nn.Parameter(tensor(1.0))
        self.add = nn.Parameter(tensor(0.))
        
    def forward(self, inp):
        mean = inp.mean((1,2,3), keepdim=True)
        variance = inp.var((1,2,3), keepdim=True)
        inp = (inp-mean)/ (variance+self.eps).sqrt()
        return inp*self.multiplier + self.add

In [9]:
def conv_ln(ni, nf, ks=3, stride=2, bn=True, **kwargs):
    layers = [nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride, bias=True),
              GeneralReLU(**kwargs)]
    if bn: layers.append(Layer_Normalization())
    return nn.Sequential(*layers)

In [10]:
model = get_cnn_model(num_categories, number_features, conv_ln)
init_cnn(model)
opt = optim.SGD(model.parameters(), lr=0.8)
runner = Runner(cb_funcs=cbfs)

In [11]:
runner.fit(3, model, opt , loss_function, train_dl, valid_dl)

train: [0.589342734375, tensor(0.8167, device='cuda:0')]
valid: [0.14574256591796875, tensor(0.9557, device='cuda:0')]
train: [0.10581474609375, tensor(0.9670, device='cuda:0')]
valid: [0.07456190185546875, tensor(0.9777, device='cuda:0')]
train: [0.0579320458984375, tensor(0.9819, device='cuda:0')]
valid: [0.06474047241210938, tensor(0.9819, device='cuda:0')]


In [12]:
#Designed for Style Transfers, not classifiers
class InstanceNorm(nn.Module):
    __constants__ = ['eps']
    def __init__(self, nf, eps=1e-0):
        super().__init__()
        self.eps = eps
        self.mults = nn.Parameter(torch.ones (nf,1,1))
        self.adds  = nn.Parameter(torch.zeros(nf,1,1))

    def forward(self, x):
        m = x.mean((2,3), keepdim=True)
        v = x.var ((2,3), keepdim=True)
        res = (x-m) / ((v+self.eps).sqrt())
        return res*self.mults + self.adds

In [15]:
def conv_in(ni, nf, ks=3, stride=2, bn=True, **kwargs):
    layers = [nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride, bias=True),
              GeneralReLU(**kwargs)]
    if bn: layers.append(InstanceNorm(nf))
    return nn.Sequential(*layers)

In [18]:
model = get_cnn_model(num_categories, number_features, conv_in)
init_cnn(model)
opt = optim.SGD(model.parameters(), lr=0.1)
runner = Runner(cb_funcs=cbfs)

In [19]:
runner.fit(3, model, opt , loss_function, train_dl, valid_dl)

train: [2.30254265625, tensor(0.1094, device='cuda:0')]
valid: [2.302323046875, tensor(0.1064, device='cuda:0')]
train: [2.30212171875, tensor(0.1111, device='cuda:0')]
valid: [2.3025388671875, tensor(0.1064, device='cuda:0')]
train: [2.301438125, tensor(0.1136, device='cuda:0')]
valid: [2.3021568359375, tensor(0.1064, device='cuda:0')]
