**Imports**

----

In [1]:
from tqdm import *
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms

In [3]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

**Load data**

----

In [4]:
X = []
Y = []

for b in range(1, 6):
    D = unpickle('./cifar-10-batches-py/data_batch_%s' % b)
    X.append( D[b'data'].reshape((-1, 3, 32, 32)).astype('uint8') )
    Y.append( np.array(D[b'labels']))
    names = [x.decode('utf-8') for x in D]

X = np.vstack(X)
Y = np.hstack(Y).astype('int')

D = unpickle('./cifar-10-batches-py/test_batch')
Xt = D[b'data'].reshape((-1, 3, 32, 32)).astype('uint8')
Yt = np.array(D[b'labels']).astype('int')
Lt = D[b'filenames']

**Normalize**

----

**Augmentation**

----

**Create model**

----

In [39]:
class Net(nn.Module):  
    def __init__(self):
        super(Net, self).__init__()
        
        dropout_parameter = 0.3
        
        input_size = 3        
        hidden_layer1_size = 48
        hidden_layer2_size = 96
        self.hidden_fn_layer_size = 96 * 6 * 6
        output_size = 10
        
        
        self.input_norm = nn.BatchNorm2d(input_size)
        
        self.layer1 = nn.Sequential( \
                                    nn.Conv2d(in_channels=input_size, out_channels=hidden_layer1_size, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_layer1_size, out_channels=hidden_layer1_size, kernel_size=3, padding=0, stride=1),
                                    nn.Dropout2d(dropout_parameter),
                                    nn.BatchNorm2d(hidden_layer1_size),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2, stride = 2)
                                   )
        
        self.layer2 = nn.Sequential( \
                                    nn.Conv2d(in_channels=hidden_layer1_size, out_channels=hidden_layer2_size, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_layer2_size, out_channels=hidden_layer2_size, kernel_size=3, padding=0, stride=1),
                                    nn.Dropout2d(dropout_parameter),
                                    nn.BatchNorm2d(hidden_layer2_size),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2, stride = 2)
                                   )
        
       
        self.fn_layer = nn.Sequential( \
                                      nn.Linear(self.hidden_fn_layer_size, self.hidden_fn_layer_size // 2),
                                      nn.Dropout2d(0.2),
                                      nn.ReLU(),
                                      nn.Linear(self.hidden_fn_layer_size // 2, output_size)
                                     )

    def forward(self, x): 
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(-1, self.hidden_fn_layer_size)
        self.fn_layer(x)
        return x 

**Learn the model**

----

In [40]:
net = Net().cuda()
net

Net (
  (input_norm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (layer1): Sequential (
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1))
    (3): Dropout2d (p=0.3)
    (4): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (layer2): Sequential (
    (0): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1))
    (3): Dropout2d (p=0.3)
    (4): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (fn_layer): Sequential (
    (0): Linear (3456 -> 1728)
    (1): Dropout2d (p=0.2)
    (2): ReLU ()
    (3): Linear (1728 -> 10)
  )
)

In [41]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

train_epoch_loss_list = []
test_epoch_loss_list = []

In [42]:
%%time
n_epoch = 40
batch_size = 1000
for epoch in tqdm(range(n_epoch)):
    train_epoch_loss = 0
    test_epoch_loss = 0
    
    print ('\nEpoch %s/%s' %(epoch+1,n_epoch))
    Xperm = np.random.permutation(X.shape[0])
    net.train(True)
    for b in range(X.shape[0]//batch_size):
        batch_idxs = Xperm[b*batch_size:(b+1)*batch_size]
        
        x = Variable(torch.Tensor(X[batch_idxs].tolist())).cuda()
        y = Variable(torch.LongTensor(Y[batch_idxs].tolist())).cuda()
        
        y_hat = net(x)
        loss = criterion(y_hat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_epoch_loss += loss.data[0]

    '''learn test'''
    Xperm = np.random.permutation(Xt.shape[0])
    net.train(False)
    for b in range(Xt.shape[0]//batch_size):
        batch_idxs = Xperm[b*batch_size:(b+1)*batch_size]
        x = Variable(torch.Tensor(Xt[batch_idxs].tolist()),volatile = True).cuda()
        y = Variable(torch.LongTensor(Yt[batch_idxs]),volatile = True).cuda()

        y_hat = net(x)
        loss = criterion(y_hat, y)
        test_epoch_loss += loss.data[0]
    
    '''save loss for current epoch'''
    train_epoch_loss_list.append(train_epoch_loss)
    test_epoch_loss_list.append(test_epoch_loss)
    print ("Epoch loss: \ntrain: %s \ntest: %s\n" %(train_epoch_loss_list[-1], test_epoch_loss_list[-1]))


Epoch 1/40
Epoch loss: 
train: 391.5603790283203 
test: 77.25887203216553


Epoch 2/40
Epoch loss: 
train: 364.4393630027771 
test: 70.95874071121216


Epoch 3/40
Epoch loss: 
train: 352.1118640899658 
test: 69.05934000015259


Epoch 4/40


KeyboardInterrupt: 