**Imports**

----

In [27]:
from tqdm import *
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [28]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms

In [29]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

**Load data**

----

In [30]:
X = []
Y = []

for b in range(1, 6):
    D = unpickle('./cifar-10-batches-py/data_batch_%s' % b)
    X.append( D[b'data'].reshape((-1, 3, 32, 32)).astype('uint8') )
    Y.append( np.array(D[b'labels']))
    names = [x.decode('utf-8') for x in D]

X = np.vstack(X)
Y = np.hstack(Y).astype('int')

D = unpickle('./cifar-10-batches-py/test_batch')
Xt = D[b'data'].reshape((-1, 3, 32, 32)).astype('uint8')
Yt = np.array(D[b'labels']).astype('int')
Lt = D[b'filenames']

**Normalize**

----

**Augmentation**

----

**Create model**

----

In [97]:
class Net(nn.Module):  
    def __init__(self):
        super(Net, self).__init__()
        
        dropout_parameter = 0.3
        
        input_size = 3        
        hidden_layer1_size = 48
        hidden_layer2_size = 96
        self.hidden_fc_layer_size = 96 * 6 * 6
        output_size = 10
        
        
        self.input_norm = nn.BatchNorm2d(input_size)
        
        self.layer1 = nn.Sequential( \
                                    nn.Conv2d(in_channels=input_size, out_channels=hidden_layer1_size, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_layer1_size, out_channels=hidden_layer1_size, kernel_size=3, padding=0, stride=1),
                                    nn.Dropout2d(dropout_parameter),
                                    nn.BatchNorm2d(hidden_layer1_size),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2, stride = 2)
                                   )
        
        self.layer2 = nn.Sequential( \
                                    nn.Conv2d(in_channels=hidden_layer1_size, out_channels=hidden_layer2_size, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_layer2_size, out_channels=hidden_layer2_size, kernel_size=3, padding=0, stride=1),
                                    nn.Dropout2d(dropout_parameter),
                                    nn.BatchNorm2d(hidden_layer2_size),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2, stride = 2)
                                   )
        
       
        self.fc_layer = nn.Sequential( \
                                      nn.Linear(self.hidden_fc_layer_size, output_size * 100),
                                      nn.Dropout2d(),
                                      nn.ReLU(),
                                      nn.Linear(output_size * 100, output_size * 100),
                                      nn.Dropout2d(),
                                      nn.ReLU(),
                                      nn.Linear(output_size * 100, output_size)
                                     )

    def forward(self, x): 
        x = self.input_norm(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(-1, self.hidden_fc_layer_size)
        self.fc_layer(x)
        return x 

In [98]:
a = not True
a

False

**Hyperparams**

----

In [99]:
class Hyperparams:
    def __init__(self):
        self.lr = 0.1
        self.lock = -4
        self.logic = False
        
    def update(self, epoch, loss=None):
        if epoch < 5:
            self.lr = 0.1
        elif epoch < 20:
            self.lr = 0.01
        elif epoch < 100:
            self.lr = 0.001
        else:
            self.lr = 0.0001
        
        if loss is not None:
            print (loss)
            loss = np.array(loss)
            '''make punch and let it rest for 5 iter'''
            print("Last %s loss std %s and mean %s" %(loss.shape,loss.std(),loss.mean()))
            if (loss.std() < 0.4) and (hp.lock + 4 < epoch):
                if self.logic: self.lr = 10 * self.lr
                else: self.lr = self.lr / 10
                self.lock = epoch
                print("PUNCH!")
                self.logic = not self.logic

**Learn the model**

----

In [100]:
net = Net().cuda()
net

Net (
  (input_norm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (layer1): Sequential (
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1))
    (3): Dropout2d (p=0.3)
    (4): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (layer2): Sequential (
    (0): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1))
    (3): Dropout2d (p=0.3)
    (4): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (fn_layer): Sequential (
    (0): Linear (3456 -> 1000)
    (1): Dropout2d (p=0.5)
    (2): ReLU ()
    (3): Linear (1000 -> 1000)
    (4): Dropout2d (p=0.5)
    (5): ReLU ()
    (6): Linear (1000 -> 10)
  )
)

In [101]:
hp = Hyperparams()

criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=hp.lr)

train_epoch_loss_list = []
test_epoch_loss_list = []

In [None]:
%%time
n_epoch = 160
batch_size = 1000
for epoch in tqdm(range(n_epoch)):
    if epoch < 5:
        hp.update(epoch)
    else:
        hp.update(epoch,loss=train_epoch_loss_list[-4:])
    train_epoch_loss = 0
    test_epoch_loss = 0
    running_corrects = 0
    
    print ('\nEpoch %s/%s' %(epoch+1,n_epoch))
    Xperm = np.random.permutation(X.shape[0])
    net.train(True)
    for b in range(X.shape[0]//batch_size):
        batch_idxs = Xperm[b*batch_size:(b+1)*batch_size]
        
        x = Variable(torch.Tensor(X[batch_idxs].tolist())).cuda()
        y = Variable(torch.LongTensor(Y[batch_idxs].tolist())).cuda()
        
        y_hat = net(x)
        loss = criterion(y_hat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_epoch_loss += loss.data[0]
        _, preds = torch.max(y_hat.data, 1)
        running_corrects += torch.sum(preds == y.data)
    print ("Epoch train accuracy %s" %(running_corrects/Y.shape[0]))
    running_corrects= 0
    for param_group in optimizer.param_groups:
        param_group['lr'] = hp.lr
    
    '''learn test'''
    Xperm = np.random.permutation(Xt.shape[0])
    net.train(False)
    for b in range(Xt.shape[0]//batch_size):
        batch_idxs = Xperm[b*batch_size:(b+1)*batch_size]
        x = Variable(torch.Tensor(Xt[batch_idxs].tolist()),volatile = True).cuda()
        y = Variable(torch.LongTensor(Yt[batch_idxs]),volatile = True).cuda()

        y_hat = net(x)
        loss = criterion(y_hat, y)
        test_epoch_loss += loss.data[0]
        running_corrects += torch.sum(preds == y.data)
    print ("Epoch test accuracy %s" %(running_corrects/Yt.shape[0]))
    
    '''save loss for current epoch'''
    train_epoch_loss_list.append(train_epoch_loss)
    test_epoch_loss_list.append(test_epoch_loss)
    print ("Epoch loss: train %s and test: %s\nlr %s\n" %(train_epoch_loss_list[-1], test_epoch_loss_list[-1], hp.lr))

  0%|          | 0/160 [00:00<?, ?it/s]


Epoch 1/160
Epoch train accuracy 0.0968


  1%|          | 1/160 [00:18<49:46, 18.78s/it]

Epoch test accuracy 0.1004
Epoch loss: train 233.00094866752625 and test: 32.78002619743347
lr 0.1


Epoch 2/160
Epoch train accuracy 0.11924


  1%|▏         | 2/160 [00:37<50:00, 18.99s/it]

Epoch test accuracy 0.0972
Epoch loss: train 167.35193729400635 and test: 30.597297191619873
lr 0.1


Epoch 3/160
Epoch train accuracy 0.12976


  2%|▏         | 3/160 [00:57<50:32, 19.31s/it]

Epoch test accuracy 0.0995
Epoch loss: train 160.00437664985657 and test: 29.872230529785156
lr 0.1


Epoch 4/160
Epoch train accuracy 0.13694


  2%|▎         | 4/160 [01:18<50:58, 19.61s/it]

Epoch test accuracy 0.0938
Epoch loss: train 157.12176036834717 and test: 29.090797662734985
lr 0.1


Epoch 5/160
Epoch train accuracy 0.13836


  3%|▎         | 5/160 [01:37<50:31, 19.56s/it]

Epoch test accuracy 0.0989
Epoch loss: train 154.57818150520325 and test: 28.476508140563965
lr 0.1

[167.35193729400635, 160.00437664985657, 157.12176036834717, 154.57818150520325]
Last (4,) loss std 4.78300943516 and mean 159.764063954

Epoch 6/160
Epoch train accuracy 0.1387


  4%|▍         | 6/160 [01:56<49:56, 19.46s/it]

Epoch test accuracy 0.0974
Epoch loss: train 153.13293385505676 and test: 28.47804880142212
lr 0.01

[160.00437664985657, 157.12176036834717, 154.57818150520325, 153.13293385505676]
Last (4,) loss std 2.61532822525 and mean 156.209313095

Epoch 7/160
Epoch train accuracy 0.14224


  4%|▍         | 7/160 [02:15<49:26, 19.39s/it]

Epoch test accuracy 0.0955
Epoch loss: train 152.43752455711365 and test: 28.312418222427368
lr 0.01

[157.12176036834717, 154.57818150520325, 153.13293385505676, 152.43752455711365]
Last (4,) loss std 1.7936928136 and mean 154.317600071

Epoch 8/160
Epoch train accuracy 0.1435


  5%|▌         | 8/160 [02:35<49:09, 19.40s/it]

Epoch test accuracy 0.1037
Epoch loss: train 151.852605342865 and test: 28.16768169403076
lr 0.01

[154.57818150520325, 153.13293385505676, 152.43752455711365, 151.852605342865]
Last (4,) loss std 1.01749950965 and mean 153.000311315

Epoch 9/160
Epoch train accuracy 0.14384


  6%|▌         | 9/160 [02:54<48:42, 19.35s/it]

Epoch test accuracy 0.1
Epoch loss: train 151.63624620437622 and test: 28.123003721237183
lr 0.01

[153.13293385505676, 152.43752455711365, 151.852605342865, 151.63624620437622]
Last (4,) loss std 0.580619167197 and mean 152.26482749

Epoch 10/160
Epoch train accuracy 0.1449


  6%|▋         | 10/160 [03:13<48:19, 19.33s/it]

Epoch test accuracy 0.0966
Epoch loss: train 151.4037573337555 and test: 28.189973831176758
lr 0.01

[152.43752455711365, 151.852605342865, 151.63624620437622, 151.4037573337555]
Last (4,) loss std 0.383664815049 and mean 151.83253336
PUNCH!

Epoch 11/160
Epoch train accuracy 0.14372


  7%|▋         | 11/160 [03:31<47:50, 19.27s/it]

Epoch test accuracy 0.1027
Epoch loss: train 151.28410530090332 and test: 28.05645751953125
lr 0.001

[151.852605342865, 151.63624620437622, 151.4037573337555, 151.28410530090332]
Last (4,) loss std 0.218494709551 and mean 151.544178545

Epoch 12/160
Epoch train accuracy 0.1428


  8%|▊         | 12/160 [03:50<47:24, 19.22s/it]

Epoch test accuracy 0.0975
Epoch loss: train 150.97156858444214 and test: 27.9472873210907
lr 0.01

[151.63624620437622, 151.4037573337555, 151.28410530090332, 150.97156858444214]
Last (4,) loss std 0.239613432485 and mean 151.323919356

Epoch 13/160
Epoch train accuracy 0.1449


  8%|▊         | 13/160 [04:09<47:01, 19.20s/it]

Epoch test accuracy 0.0936
Epoch loss: train 150.97780203819275 and test: 27.888265132904053
lr 0.01

[151.4037573337555, 151.28410530090332, 150.97156858444214, 150.97780203819275]
Last (4,) loss std 0.189420395821 and mean 151.159308314

Epoch 14/160
Epoch train accuracy 0.1438


  9%|▉         | 14/160 [04:28<46:40, 19.18s/it]

Epoch test accuracy 0.0937
Epoch loss: train 150.56205677986145 and test: 27.881697416305542
lr 0.01

[151.28410530090332, 150.97156858444214, 150.97780203819275, 150.56205677986145]
Last (4,) loss std 0.25659280101 and mean 150.948883176

Epoch 15/160
Epoch train accuracy 0.14574


  9%|▉         | 15/160 [04:47<46:23, 19.20s/it]

Epoch test accuracy 0.102
Epoch loss: train 150.76653623580933 and test: 27.938265323638916
lr 0.01

[150.97156858444214, 150.97780203819275, 150.56205677986145, 150.76653623580933]
Last (4,) loss std 0.17122102766 and mean 150.81949091
PUNCH!

Epoch 16/160
Epoch train accuracy 0.1446


 10%|█         | 16/160 [05:06<46:00, 19.17s/it]

Epoch test accuracy 0.0942
Epoch loss: train 150.3684504032135 and test: 27.892476081848145
lr 0.1

[150.97780203819275, 150.56205677986145, 150.76653623580933, 150.3684504032135]
Last (4,) loss std 0.22728759057 and mean 150.668711364

Epoch 17/160
Epoch train accuracy 0.14022


 11%|█         | 17/160 [05:26<45:42, 19.18s/it]

Epoch test accuracy 0.0957
Epoch loss: train 152.2458257675171 and test: 27.80787682533264
lr 0.01

[150.56205677986145, 150.76653623580933, 150.3684504032135, 152.2458257675171]
Last (4,) loss std 0.74101625269 and mean 150.985717297

Epoch 18/160
Epoch train accuracy 0.14592


 11%|█▏        | 18/160 [05:44<45:19, 19.15s/it]

Epoch test accuracy 0.0977
Epoch loss: train 150.57952880859375 and test: 27.776866674423218
lr 0.01

[150.76653623580933, 150.3684504032135, 152.2458257675171, 150.57952880859375]
Last (4,) loss std 0.738553465126 and mean 150.990085304

Epoch 19/160
Epoch train accuracy 0.14778


 12%|█▏        | 19/160 [06:03<45:00, 19.16s/it]

Epoch test accuracy 0.1004
Epoch loss: train 149.73231720924377 and test: 27.71041440963745
lr 0.01

[150.3684504032135, 152.2458257675171, 150.57952880859375, 149.73231720924377]
Last (4,) loss std 0.928230713309 and mean 150.731530547

Epoch 20/160
Epoch train accuracy 0.14814


 12%|█▎        | 20/160 [06:22<44:38, 19.13s/it]

Epoch test accuracy 0.0922
Epoch loss: train 149.51189279556274 and test: 27.701629638671875
lr 0.01

[152.2458257675171, 150.57952880859375, 149.73231720924377, 149.51189279556274]
Last (4,) loss std 1.07455983417 and mean 150.517391145

Epoch 21/160
Epoch train accuracy 0.14832


 13%|█▎        | 21/160 [06:42<44:22, 19.15s/it]

Epoch test accuracy 0.0962
Epoch loss: train 149.6364941596985 and test: 27.57575011253357
lr 0.001

[150.57952880859375, 149.73231720924377, 149.51189279556274, 149.6364941596985]
Last (4,) loss std 0.419837987973 and mean 149.865058243

Epoch 22/160
Epoch train accuracy 0.14928


 14%|█▍        | 22/160 [07:01<44:05, 19.17s/it]

Epoch test accuracy 0.1018
Epoch loss: train 149.33742833137512 and test: 27.49079465866089
lr 0.001

[149.73231720924377, 149.51189279556274, 149.6364941596985, 149.33742833137512]
Last (4,) loss std 0.147713811921 and mean 149.554533124
PUNCH!

Epoch 23/160
Epoch train accuracy 0.15016


 14%|█▍        | 23/160 [07:20<43:45, 19.16s/it]

Epoch test accuracy 0.0979
Epoch loss: train 149.37393164634705 and test: 27.49830937385559
lr 0.0001

[149.51189279556274, 149.6364941596985, 149.33742833137512, 149.37393164634705]
Last (4,) loss std 0.118508586976 and mean 149.464936733

Epoch 24/160
Epoch train accuracy 0.15166


 15%|█▌        | 24/160 [07:39<43:26, 19.16s/it]

Epoch test accuracy 0.0977
Epoch loss: train 149.01072025299072 and test: 27.50736403465271
lr 0.001

[149.6364941596985, 149.33742833137512, 149.37393164634705, 149.01072025299072]
Last (4,) loss std 0.222200022805 and mean 149.339643598

Epoch 25/160
Epoch train accuracy 0.149


 16%|█▌        | 25/160 [07:58<43:06, 19.16s/it]

Epoch test accuracy 0.101
Epoch loss: train 149.35410404205322 and test: 27.533576250076294
lr 0.001

[149.33742833137512, 149.37393164634705, 149.01072025299072, 149.35410404205322]
Last (4,) loss std 0.149703209835 and mean 149.269046068

Epoch 26/160
Epoch train accuracy 0.15022


 16%|█▋        | 26/160 [08:17<42:46, 19.15s/it]

Epoch test accuracy 0.0929
Epoch loss: train 149.34345984458923 and test: 27.467495918273926
lr 0.001

[149.37393164634705, 149.01072025299072, 149.35410404205322, 149.34345984458923]
Last (4,) loss std 0.150413085827 and mean 149.270553946

Epoch 27/160
Epoch train accuracy 0.14942


 17%|█▋        | 27/160 [08:36<42:26, 19.15s/it]

Epoch test accuracy 0.0965
Epoch loss: train 149.28784084320068 and test: 27.486947536468506
lr 0.001

[149.01072025299072, 149.35410404205322, 149.34345984458923, 149.28784084320068]
Last (4,) loss std 0.139870803714 and mean 149.249031246
PUNCH!

Epoch 28/160
Epoch train accuracy 0.15112


 18%|█▊        | 28/160 [08:55<42:05, 19.13s/it]

Epoch test accuracy 0.0939
Epoch loss: train 149.04545402526855 and test: 27.473308801651
lr 0.01

[149.35410404205322, 149.34345984458923, 149.28784084320068, 149.04545402526855]
Last (4,) loss std 0.125105269297 and mean 149.257714689

Epoch 29/160
Epoch train accuracy 0.1488


 18%|█▊        | 29/160 [09:15<41:47, 19.14s/it]

Epoch test accuracy 0.0971
Epoch loss: train 149.23967671394348 and test: 27.610140562057495
lr 0.001

[149.34345984458923, 149.28784084320068, 149.04545402526855, 149.23967671394348]
Last (4,) loss std 0.11221226691 and mean 149.229107857

Epoch 30/160
Epoch train accuracy 0.15118


 19%|█▉        | 30/160 [09:34<41:28, 19.14s/it]

Epoch test accuracy 0.1
Epoch loss: train 148.78549599647522 and test: 27.470577716827393
lr 0.001

[149.28784084320068, 149.04545402526855, 149.23967671394348, 148.78549599647522]
Last (4,) loss std 0.197642709759 and mean 149.089616895

Epoch 31/160
Epoch train accuracy 0.1514


 19%|█▉        | 31/160 [09:53<41:10, 19.15s/it]

Epoch test accuracy 0.0934
Epoch loss: train 148.69850277900696 and test: 27.47256350517273
lr 0.001

[149.04545402526855, 149.23967671394348, 148.78549599647522, 148.69850277900696]
Last (4,) loss std 0.213949901944 and mean 148.942282379

Epoch 32/160
Epoch train accuracy 0.151


 20%|██        | 32/160 [10:13<40:52, 19.16s/it]

Epoch test accuracy 0.0946
Epoch loss: train 148.7878177165985 and test: 27.40722393989563
lr 0.001

[149.23967671394348, 148.78549599647522, 148.69850277900696, 148.7878177165985]
Last (4,) loss std 0.211966429212 and mean 148.877873302
PUNCH!

Epoch 33/160
Epoch train accuracy 0.15202


 21%|██        | 33/160 [10:32<40:33, 19.16s/it]

Epoch test accuracy 0.0939
Epoch loss: train 148.71213221549988 and test: 27.373539209365845
lr 0.0001

[148.78549599647522, 148.69850277900696, 148.7878177165985, 148.71213221549988]
Last (4,) loss std 0.040962383141 and mean 148.745987177

Epoch 34/160
Epoch train accuracy 0.15144


 21%|██▏       | 34/160 [10:51<40:12, 19.15s/it]

Epoch test accuracy 0.0986
Epoch loss: train 148.93247389793396 and test: 27.397733449935913
lr 0.001

[148.69850277900696, 148.7878177165985, 148.71213221549988, 148.93247389793396]
Last (4,) loss std 0.0929077388465 and mean 148.782731652

Epoch 35/160
Epoch train accuracy 0.14948


 22%|██▏       | 35/160 [11:10<39:53, 19.15s/it]

Epoch test accuracy 0.0962
Epoch loss: train 148.75130581855774 and test: 27.41916584968567
lr 0.001

[148.7878177165985, 148.71213221549988, 148.93247389793396, 148.75130581855774]
Last (4,) loss std 0.0832517739124 and mean 148.795932412

Epoch 36/160
Epoch train accuracy 0.1493


 22%|██▎       | 36/160 [11:28<39:33, 19.14s/it]

Epoch test accuracy 0.0951
Epoch loss: train 148.76656699180603 and test: 27.408029079437256
lr 0.001

[148.71213221549988, 148.93247389793396, 148.75130581855774, 148.76656699180603]
Last (4,) loss std 0.0842719003289 and mean 148.790619731

Epoch 37/160


**Plot results**

----

In [102]:
print ("Min values and epoch\ntrain: %s\ntest: %s" \
       %(np.array(train_epoch_loss_list).min(), np.array(test_epoch_loss_list).min()) )
train_loss, = plt.plot(train_epoch_loss_list, 'g-',linewidth = 1, label='Train')
test_loss, = plt.plot(test_epoch_loss_list, 'b-',linewidth = 1, label = "Test")
plt.legend(handles=[train_loss, test_loss])
plt.grid()
plt.show()

ValueError: zero-size array to reduction operation minimum which has no identity