**Imports**

----

In [1]:
from tqdm import *
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms

In [3]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

**Load data**

----

In [4]:
X = []
Y = []

for b in range(1, 6):
    D = unpickle('./cifar-10-batches-py/data_batch_%s' % b)
    X.append( D[b'data'].reshape((-1, 3, 32, 32)).astype('uint8') )
    Y.append( np.array(D[b'labels']))
    names = [x.decode('utf-8') for x in D]

X = np.vstack(X)
Y = np.hstack(Y).astype('int')

D = unpickle('./cifar-10-batches-py/test_batch')
Xt = D[b'data'].reshape((-1, 3, 32, 32)).astype('uint8')
Yt = np.array(D[b'labels']).astype('int')
Lt = D[b'filenames']

**Normalize**

----

**Augmentation**

----

**Create model**

----

In [5]:
class Net(nn.Module):  
    def __init__(self):
        super(Net, self).__init__()
        
        dropout_parameter = 0.3
        
        input_size = 3        
        hidden_layer1_size = 48
        hidden_layer2_size = 96
        self.hidden_fc_layer_size = 96 * 6 * 6
        output_size = 10
        
        
        self.input_norm = nn.BatchNorm2d(input_size)
        
        self.layer1 = nn.Sequential( \
                                    nn.Conv2d(in_channels=input_size, out_channels=hidden_layer1_size, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_layer1_size, out_channels=hidden_layer1_size, kernel_size=3, padding=0, stride=1),
                                    nn.Dropout2d(dropout_parameter),
                                    nn.BatchNorm2d(hidden_layer1_size),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2, stride = 2)
                                   )
        
        self.layer2 = nn.Sequential( \
                                    nn.Conv2d(in_channels=hidden_layer1_size, out_channels=hidden_layer2_size, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_layer2_size, out_channels=hidden_layer2_size, kernel_size=3, padding=0, stride=1),
                                    nn.Dropout2d(dropout_parameter),
                                    nn.BatchNorm2d(hidden_layer2_size),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2, stride = 2)
                                   )
        
       
        self.fc_layer = nn.Sequential( \
                                      nn.Linear(self.hidden_fc_layer_size, output_size * 100),
                                      nn.Dropout2d(),
                                      nn.ReLU(),
                                      nn.Linear(output_size * 100, output_size * 100),
                                      nn.Dropout2d(),
                                      nn.ReLU(),
                                      nn.Linear(output_size * 100, output_size)
                                     )

    def forward(self, x): 
        x = self.input_norm(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(-1, self.hidden_fc_layer_size)
        self.fc_layer(x)
        return x 

**Hyperparams**

----

In [6]:
class Hyperparams:
    def __init__(self):
        self.lr = 0.1
        self.lock = -4
        self.logic = False
        
    def update(self, epoch, loss=None):
        if epoch < 5:
            self.lr = 0.1
        elif epoch < 20:
            self.lr = 0.01
        elif epoch < 100:
            self.lr = 0.001
        else:
            self.lr = 0.0001
        
        if loss is not None:
            print (loss)
            loss = np.array(loss)
            '''make punch and let it rest for 5 iter'''
            print("Last %s loss std %s and mean %s" %(loss.shape,loss.std(),loss.mean()))
            if (loss.std() < 0.4) and (hp.lock + 4 < epoch):
                if self.logic: self.lr = 10 * self.lr
                else: self.lr = self.lr / 10
                self.lock = epoch
                print("PUNCH!")
                self.logic = not self.logic

**Learn the model**

----

In [7]:
net = Net().cuda()
net

Net (
  (input_norm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (layer1): Sequential (
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1))
    (3): Dropout2d (p=0.3)
    (4): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (layer2): Sequential (
    (0): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1))
    (3): Dropout2d (p=0.3)
    (4): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (fc_layer): Sequential (
    (0): Linear (3456 -> 1000)
    (1): Dropout2d (p=0.5)
    (2): ReLU ()
    (3): Linear (1000 -> 1000)
    (4): Dropout2d (p=0.5)
    (5): ReLU ()
    (6): Linear (1000 -> 10)
  )
)

In [None]:
hp = Hyperparams()

criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=hp.lr)

train_epoch_loss_list = []
test_epoch_loss_list = []

In [None]:
%%time
n_epoch = 160
batch_size = 1000
for epoch in tqdm(range(n_epoch)):
    if epoch < 5:
        hp.update(epoch)
    else:
        hp.update(epoch,loss=train_epoch_loss_list[-4:])
    train_epoch_loss = 0
    test_epoch_loss = 0
    running_corrects = 0
    
    print ('\nEpoch %s/%s' %(epoch+1,n_epoch))
    Xperm = np.random.permutation(X.shape[0])
    net.train(True)
    for b in range(X.shape[0]//batch_size):
        batch_idxs = Xperm[b*batch_size:(b+1)*batch_size]
        
        x = Variable(torch.Tensor(X[batch_idxs].tolist())).cuda()
        y = Variable(torch.LongTensor(Y[batch_idxs].tolist())).cuda()
        
        y_hat = net(x)
        loss = criterion(y_hat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_epoch_loss += loss.data[0]
        _, preds = torch.max(y_hat.data, 1)
        running_corrects += torch.sum(preds == y.data)
    print ("Epoch train accuracy %s" %(running_corrects/Y.shape[0]))
    running_corrects= 0
    for param_group in optimizer.param_groups:
        param_group['lr'] = hp.lr
    
    '''learn test'''
    Xperm = np.random.permutation(Xt.shape[0])
    net.train(False)
    for b in range(Xt.shape[0]//batch_size):
        batch_idxs = Xperm[b*batch_size:(b+1)*batch_size]
        x = Variable(torch.Tensor(Xt[batch_idxs].tolist()),volatile = True).cuda()
        y = Variable(torch.LongTensor(Yt[batch_idxs]),volatile = True).cuda()

        y_hat = net(x)
        loss = criterion(y_hat, y)
        test_epoch_loss += loss.data[0]
        running_corrects += torch.sum(preds == y.data)
    print ("Epoch test accuracy %s" %(running_corrects/Yt.shape[0]))
    
    '''save loss for current epoch'''
    train_epoch_loss_list.append(train_epoch_loss)
    test_epoch_loss_list.append(test_epoch_loss)
    print ("Epoch loss: train %s and test: %s\nlr %s\n" %(train_epoch_loss_list[-1], test_epoch_loss_list[-1], hp.lr))

  0%|          | 0/160 [00:00<?, ?it/s]


Epoch 1/160
Epoch train accuracy 0.06564


  1%|          | 1/160 [00:20<55:04, 20.78s/it]

Epoch test accuracy 0.1001
Epoch loss: train 262.0794870853424 and test: 34.86334276199341
lr 0.1


Epoch 2/160
Epoch train accuracy 0.10456


  1%|▏         | 2/160 [00:39<52:34, 19.96s/it]

Epoch test accuracy 0.0966
Epoch loss: train 164.0585629940033 and test: 30.44432306289673
lr 0.1


Epoch 3/160
Epoch train accuracy 0.10762


  2%|▏         | 3/160 [00:58<51:15, 19.59s/it]

Epoch test accuracy 0.0963
Epoch loss: train 160.54601526260376 and test: 30.710731029510498
lr 0.1


Epoch 4/160
Epoch train accuracy 0.10848


  2%|▎         | 4/160 [01:17<50:25, 19.39s/it]

Epoch test accuracy 0.0931
Epoch loss: train 159.17747020721436 and test: 30.489855527877808
lr 0.1


Epoch 5/160
Epoch train accuracy 0.12212


  3%|▎         | 5/160 [01:36<49:49, 19.28s/it]

Epoch test accuracy 0.0995
Epoch loss: train 157.64793920516968 and test: 29.83544397354126
lr 0.1

[164.0585629940033, 160.54601526260376, 159.17747020721436, 157.64793920516968]
Last (4,) loss std 2.36999975224 and mean 160.357496917

Epoch 6/160
Epoch train accuracy 0.13258


  4%|▍         | 6/160 [01:55<49:19, 19.22s/it]

Epoch test accuracy 0.1012
Epoch loss: train 155.9716100692749 and test: 29.226439952850342
lr 0.01

[160.54601526260376, 159.17747020721436, 157.64793920516968, 155.9716100692749]
Last (4,) loss std 1.70704472258 and mean 158.335758686

Epoch 7/160
Epoch train accuracy 0.13694


  4%|▍         | 7/160 [02:14<48:51, 19.16s/it]

Epoch test accuracy 0.0996
Epoch loss: train 154.69183254241943 and test: 29.204413890838623
lr 0.01

[159.17747020721436, 157.64793920516968, 155.9716100692749, 154.69183254241943]
Last (4,) loss std 1.69418907707 and mean 156.872213006

Epoch 8/160
Epoch train accuracy 0.13604


  5%|▌         | 8/160 [02:33<48:28, 19.13s/it]

Epoch test accuracy 0.1029
Epoch loss: train 154.49271821975708 and test: 29.043790102005005
lr 0.01

[157.64793920516968, 155.9716100692749, 154.69183254241943, 154.49271821975708]
Last (4,) loss std 1.2591828594 and mean 155.701025009

Epoch 9/160
Epoch train accuracy 0.13524


  6%|▌         | 9/160 [02:51<48:04, 19.10s/it]

Epoch test accuracy 0.0996
Epoch loss: train 154.06782698631287 and test: 28.884506940841675
lr 0.01

[155.9716100692749, 154.69183254241943, 154.49271821975708, 154.06782698631287]
Last (4,) loss std 0.709705205331 and mean 154.805996954

Epoch 10/160
Epoch train accuracy 0.1376


  6%|▋         | 10/160 [03:11<47:48, 19.12s/it]

Epoch test accuracy 0.0968
Epoch loss: train 154.1895046234131 and test: 28.92375349998474
lr 0.01

[154.69183254241943, 154.49271821975708, 154.06782698631287, 154.1895046234131]
Last (4,) loss std 0.246048684003 and mean 154.360470593
PUNCH!

Epoch 11/160
Epoch train accuracy 0.13858


  7%|▋         | 11/160 [03:30<47:27, 19.11s/it]

Epoch test accuracy 0.0936
Epoch loss: train 154.04535341262817 and test: 28.914010763168335
lr 0.001

[154.49271821975708, 154.06782698631287, 154.1895046234131, 154.04535341262817]
Last (4,) loss std 0.178307388444 and mean 154.198850811

Epoch 12/160
Epoch train accuracy 0.13598


  8%|▊         | 12/160 [03:49<47:06, 19.10s/it]

Epoch test accuracy 0.1011
Epoch loss: train 153.65977001190186 and test: 28.7800874710083
lr 0.01

[154.06782698631287, 154.1895046234131, 154.04535341262817, 153.65977001190186]
Last (4,) loss std 0.198729384523 and mean 153.990613759

Epoch 13/160
Epoch train accuracy 0.1368


  8%|▊         | 13/160 [04:07<46:42, 19.07s/it]

Epoch test accuracy 0.0986
Epoch loss: train 153.93299317359924 and test: 28.917562246322632
lr 0.01

[154.1895046234131, 154.04535341262817, 153.65977001190186, 153.93299317359924]
Last (4,) loss std 0.194156305011 and mean 153.956905305

Epoch 14/160
Epoch train accuracy 0.13428


  9%|▉         | 14/160 [04:26<46:21, 19.05s/it]

Epoch test accuracy 0.0961
Epoch loss: train 154.11234498023987 and test: 28.770448684692383
lr 0.01

[154.04535341262817, 153.65977001190186, 153.93299317359924, 154.11234498023987]
Last (4,) loss std 0.172740651151 and mean 153.937615395

Epoch 15/160
Epoch train accuracy 0.13828


  9%|▉         | 15/160 [04:45<45:56, 19.01s/it]

Epoch test accuracy 0.0978
Epoch loss: train 153.50401306152344 and test: 28.95422101020813
lr 0.01

[153.65977001190186, 153.93299317359924, 154.11234498023987, 153.50401306152344]
Last (4,) loss std 0.235848727482 and mean 153.802280307
PUNCH!

Epoch 16/160
Epoch train accuracy 0.13786


 10%|█         | 16/160 [05:04<45:41, 19.04s/it]

Epoch test accuracy 0.0983
Epoch loss: train 153.52433609962463 and test: 28.69005584716797
lr 0.1

[153.93299317359924, 154.11234498023987, 153.50401306152344, 153.52433609962463]
Last (4,) loss std 0.262133888035 and mean 153.768421829

Epoch 17/160
Epoch train accuracy 0.13634


 11%|█         | 17/160 [05:24<45:33, 19.11s/it]

Epoch test accuracy 0.1017
Epoch loss: train 154.64259338378906 and test: 28.998212575912476
lr 0.01

[154.11234498023987, 153.50401306152344, 153.52433609962463, 154.64259338378906]
Last (4,) loss std 0.470655332103 and mean 153.945821881

Epoch 18/160
Epoch train accuracy 0.13612


 11%|█▏        | 18/160 [05:44<45:21, 19.16s/it]

Epoch test accuracy 0.0951
Epoch loss: train 153.53926849365234 and test: 28.55550241470337
lr 0.01

[153.50401306152344, 153.52433609962463, 154.64259338378906, 153.53926849365234]
Last (4,) loss std 0.485159076025 and mean 153.80255276

Epoch 19/160


**Plot results**

----

In [None]:
print ("Min values and epoch\ntrain: %s\ntest: %s" \
       %(np.array(train_epoch_loss_list).min(), np.array(test_epoch_loss_list).min()) )
train_loss, = plt.plot(train_epoch_loss_list, 'g-',linewidth = 1, label='Train')
test_loss, = plt.plot(test_epoch_loss_list, 'b-',linewidth = 1, label = "Test")
plt.legend(handles=[train_loss, test_loss])
plt.grid()
plt.show()