In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
from utilties.imports import *
torch.set_num_threads(2)

In [3]:
#typical example setup
MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'
x_train, y_train, x_valid, y_valid = get_data(MNIST_URL)
x_train, x_valid = normalize_to(x_train, x_valid)
number_hidden = 50
batch_size = 512
num_categories = y_train.max().item()+1
loss_function = F.cross_entropy
training_ds = Dataset(x_train, y_train)
validation_ds = Dataset(x_valid, y_valid)
train_dl = DataLoader(training_ds, batch_size, shuffle = True) #random sampler
valid_dl = DataLoader(validation_ds, batch_size*2, shuffle = False) #sequential sampler
model, optimizer = get_model(train_dl)

In [4]:
#resize mnist image data -> 28x28
def mnist_resize(x): return x.view(-1, 1, 28, 28)

def flatten(x):
    return x.view(x.shape[0], -1) #removes 1,1 axis from result of AvgPool layer

In [5]:
print("Mean: %f\nSTD: %f"%(x_train.mean(),x_train.std()))

Mean: 0.000031
STD: 1.000000


### Lambda Class
- Lambda layers allow for Pytorch Models to be saved using pickle
- Lambda functions don't pickle due to lack of name
- examples:
    1. resizing images before putting them through layers
    2. use case example: mnist_resize

In [6]:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x): return self.func(x)

In [7]:
def get_cnn_model(num_categories):
    return nn.Sequential(
        Lambda(mnist_resize),
        nn.Conv2d( 1, 8, 5, padding=2,stride=2), nn.ReLU(), #14
        nn.Conv2d( 8,16, 3, padding=1,stride=2), nn.ReLU(), # 7
        nn.Conv2d(16,32, 3, padding=1,stride=2), nn.ReLU(), # 4
        nn.Conv2d(32,32, 3, padding=1,stride=2), nn.ReLU(), # 2
        nn.AdaptiveAvgPool2d(1),
        Lambda(flatten),
        nn.Linear(32,num_categories)
    )

In [None]:
model = get_cnn_model(num_categories)
model

In [9]:
callback_functions = [Recorder, partial(AvgStatsCallback, accuracy)]
opt = optim.SGD(model.parameters(), lr=0.4)
runner = Runner(cb_funcs=callback_functions)

In [10]:
%time runner.fit(1, model, opt, loss_function, train_dl, valid_dl)

train: [2.29546484375, tensor(0.1151)]
valid: [2.2284873046875, tensor(0.3473)]
Wall time: 11.9 s


# Throwing the model on the GPU
- Things needed to go on the GPU
   1. Parameters
   2. Inputs

In [11]:
# Somewhat more flexible way
device = torch.device('cuda',0)

In [12]:
class CudaCallback(Callback):
    def __init__(self,device): self.device=device
    def begin_fit(self): 
        self.model.to(self.device)
    def begin_batch(self):
        self.run.xb,self.run.yb = self.xb.to(self.device),self.yb.to(self.device)

In [13]:
# Somewhat less flexible, but quite convenient
torch.cuda.set_device(device)

In [14]:
#export
class CudaCallback(Callback):
    def begin_fit(self): self.model.cuda()
    def begin_batch(self): self.run.xb,self.run.yb = self.xb.cuda(),self.yb.cuda()

In [15]:
callback_functions.append(CudaCallback)

In [16]:
model = get_cnn_model(num_categories)

In [17]:
opt = optim.SGD(model.parameters(), lr=0.4)
run = Runner(cb_funcs=callback_functions)

In [18]:
%time run.fit(3, model, opt, loss_function, train_dl, valid_dl)

train: [2.0537396875, tensor(0.2718, device='cuda:0')]
valid: [0.984716796875, tensor(0.6936, device='cuda:0')]
train: [0.46585046875, tensor(0.8540, device='cuda:0')]
valid: [0.353442822265625, tensor(0.8915, device='cuda:0')]
train: [0.19904873046875, tensor(0.9405, device='cuda:0')]
valid: [0.18188040771484376, tensor(0.9416, device='cuda:0')]
Wall time: 5.37 s


# Refactoring - Creating Standard Layer Groups and Adding More Callbacks

### Convolutional 2-D Layer: Conv2D + ReLU

In [19]:
def conv2d(ni, nf, ks=3, stride=2):
    return nn.Sequential(
        nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride), nn.ReLU())

In [20]:
#applies a given transform to the independent variables (xs)
class IndependentVarBatchTransformCallback(Callback):
    _order=2
    def __init__(self, tfm): self.tfm = tfm
    def begin_batch(self): self.run.xb = self.tfm(self.xb)
            
#re-view an x variable at a specific size
def view_tfm(*size):
    def _inner(x): return x.view(*((-1,)+size))
    return _inner

In [21]:
mnist_view = view_tfm(1,28,28)
callback_functions.append(partial(IndependentVarBatchTransformCallback, mnist_view))

In [22]:
number_fields = [8,16,32,32]

In [23]:
def get_cnn_layers(num_categories, nfs):
    nfs = [1] + nfs
    return [
        conv2d(nfs[i], nfs[i+1], 5 if i==0 else 3) for i in range(len(nfs)-1)
    ] + [nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(nfs[-1], num_categories)]

In [24]:
def get_cnn_model(num_categories, nfs):
    return nn.Sequential(*get_cnn_layers(num_categories, nfs))

In [25]:
model = get_cnn_model(num_categories, number_fields)
run = Runner(cb_funcs=callback_functions)

In [26]:
model

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (4): AdaptiveAvgPool2d(output_size=1)
  (5): Lambda()
  (6): Linear(in_features=32, out_features=10, bias=True)
)

In [27]:
run.fit(3, model, opt, loss_function, train_dl, valid_dl)

train: [2.30709703125, tensor(0.1020, device='cuda:0')]
valid: [2.3072546875, tensor(0.1030, device='cuda:0')]
train: [2.30709703125, tensor(0.1020, device='cuda:0')]
valid: [2.3072546875, tensor(0.1030, device='cuda:0')]
train: [2.3070971875, tensor(0.1020, device='cuda:0')]
valid: [2.3072546875, tensor(0.1030, device='cuda:0')]
