In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import utils
import time

In [3]:
#device= torch.device("cuda")
device= torch.device("cpu")
print(device)

cuda


In [4]:
from utils import check_cifar_dataset_exists
data_path=check_cifar_dataset_exists()

train_data=torch.load(data_path+'cifar/train_data.pt')
train_label=torch.load(data_path+'cifar/train_label.pt')
test_data=torch.load(data_path+'cifar/test_data.pt')
test_label=torch.load(data_path+'cifar/test_label.pt')

print(train_data.size())
print(test_data.size())

torch.Size([50000, 3, 32, 32])
torch.Size([10000, 3, 32, 32])


In [5]:
class Stem(nn.Module):

    def __init__(self):

        super(Stem, self).__init__()
        
        #----------------------- stem block start ----------------------------
        
        #3 x 32 x 32 --> 16 x 30 x 30  , VALID Padding 
        #self.conv1a = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=0 ) 
        
        #3 x 32 x 32 --> 16 x 30 x 30  , VALID Padding 
        self.conv1a = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=0 )
        
        #16 x 30 x 30 --> 32 x 30 x 30  , SAME Padding 
        self.conv1b = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1 )
        
        #======================== Filter concat 1 =============================
        
        #32 x 30 x 30 --> 32 x 28 x 28, kernel size = 3, VALID Padding  
        self.pool1  = nn.MaxPool2d(kernel_size=3, stride=1, padding=0 )
        
        #32 x 30 x 30 --> 48 x 28 x 28  , VALID Padding
        self.conv2a = nn.Conv2d(32, 48, kernel_size=3, stride=1, padding=0 )
        
        #======================== Filter concat 1 =============================
        
        #======================== Filter concat 2 =============================       
        
        #80 x 28 x 28 --> 32 x 28 x 28  , SAME Padding   
        self.conv3a = nn.Conv2d(80, 32, stride=1, kernel_size=1, padding=0 )
        
        #32 x 28 x 28 --> 48 x 26 x 26  , VALID Padding   
        self.conv3b = nn.Conv2d(32, 48, stride=1, kernel_size=3, padding=0 )
        
        #80 x 28 x 28 --> 32 x 28 x 28  , SAME Padding   Might be combined with self.conv1e1
        self.conv4a = nn.Conv2d(80, 32, stride=1, kernel_size=1, padding=0 )
        
        #32 x 28 x 28 --> 32 x 28 x 28  , SAME Padding   Find out the size of output
        self.conv4b = nn.Conv2d(32, 32, stride=1, kernel_size=[7,1], padding=[3,0] )
        
        #32 x 28 x 28 --> 32 x 28 x 28  , SAME Padding   Find out the size of output
        self.conv4c = nn.Conv2d(32, 32, stride=1, kernel_size=[1,7], padding=[0,3] )
        
        #32 x 28 x 28 --> 48 x 26 x 26  , VALID Padding   Might be combined with self.conv1e2
        self.conv4d = nn.Conv2d(32, 48, stride=1, kernel_size=3, padding=0 )
        
        #======================== Filter concat 2 ============================= 
        
        #======================== Filter concat 3 =============================
        
        #96 x 26 x 26 --> 192 x 26 x 26  , SAME Padding 
        self.conv5a = nn.Conv2d(96, 192, kernel_size=3, stride=1, padding=1 )
         
        #192 x 71 x 71 --> 192 x 35 x 35, kernel size = 3, VALID Padding
        #self.pool2  = nn.MaxPool2d(3, stride=2, padding=0 )
        
        #======================== Filter concat 3 =============================
        
        #----------------------- stem block finish ----------------------------
        
    def forward(self, x):

        # block 1: 
        x = self.conv1a(x)
        x = F.relu(x)
        x = self.conv1b(x)
        x = F.relu(x)
        #x = self.conv1c(x)
        #x = F.relu(x)
        
        xP = self.pool1(x)
        xC = self.conv2a(x)
        xC = F.relu(xC)
        xFC1 = torch.cat((xP, xC), 1)
       
        y = self.conv3a(xFC1)
        y = F.relu(y)
        y = self.conv3b(y)
        y = F.relu(y)
       
        z = self.conv4a(xFC1)
        z = F.relu(z)
        z = self.conv4b(z)
        z = F.relu(z)
        z = self.conv4c(z)
        z = F.relu(z)
        z = self.conv4d(z)
        z = F.relu(z)
        
        # Above code or this one?
        #z = self.conv3a(xFC1)
        #z = F.relu(z)
        #z = self.conv4b(z)
        #z = F.relu(z)
        #z = self.conv4c(xFC1)
        #z = F.relu(z)
        #z = self.conv3b(z)
        #z = F.relu(z)
        
        xFC2 = torch.cat((y, z), 1)
        
        #xP = self.pool2(xFC2)
        xC = self.conv5a(xFC2)
        xC = F.relu(xC)
        #xFC3 = torch.cat((xP, xC), 1)   
        
        return xC

In [6]:
class InceptionA(nn.Module):

    def __init__(self):

        super(InceptionA, self).__init__()
        
        #----------------------- InceptionA block start ----------------------------
        
        #======================== Filter concat =============================
        
        #block 1
        #192 x 26 x 26 --> 192 x 26 x 26  , kernel size = 3, SAME Padding
        self.pool1 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1 )
        
        #192 x 26 x 26 --> 48 x 26 x 26  , SAME Padding 
        self.conv1a = nn.Conv2d(192, 48, kernel_size=1, padding=0 ) 
        
        #block 2
        #192 x 26 x 26 --> 48 x 26 x 26  , SAME Padding 
        self.conv2a = nn.Conv2d(192, 48, kernel_size=1, padding=0 ) 
        
        #block 3
        #192 x 26 x 26 --> 32 x 26 x 26  , SAME Padding 
        self.conv3a = nn.Conv2d(192, 32, kernel_size=1, padding=0 )
        
        #32 x 26 x 26 --> 48 x 26 x 26  , SAME Padding 
        self.conv3b = nn.Conv2d(32, 48, kernel_size=3, padding=1 )
        
        #block 4
        #192 x 26 x 26 --> 32 x 26 x 26  , SAME Padding 
        self.conv4a = nn.Conv2d(192, 32, kernel_size=1, padding=0 )

        #32 x 26 x 26 --> 48 x 26 x 26  , SAME Padding 
        self.conv4b = nn.Conv2d(32, 48, kernel_size=3, padding=1 )
        
        #48 x 26 x 26 --> 48 x 26 x 26  , SAME Padding 
        self.conv4c = nn.Conv2d(48, 48, kernel_size=3, padding=1 )
        
        
        #======================== Filter concat =============================
        
        #----------------------- InceptionA block finish ----------------------------
        
    def forward(self, x):

        # block 1:
        y = self.pool1(x)
        y = self.conv1a(y)
        y = F.relu(y)   # Do we need Relu here (after last operation)?
        
        # block 2:
        z = self.conv2a(x)
        z = F.relu(z) 
        
        #block 3:
        w = self.conv3a(x)
        w = F.relu(w)
        w = self.conv3b(w)
        w = F.relu(w)
        
        #block 4:
        v = self.conv4a(x)
        v = F.relu(v)
        v = self.conv4b(v)
        v = F.relu(v)
        v = self.conv4c(v)
        v = F.relu(v)
        
        xFC = torch.cat((y, z, w, v), 1)
        
        return xFC

In [7]:
class InceptionB(nn.Module):

    def __init__(self):

        super(InceptionB, self).__init__()
        
        #----------------------- InceptionB block start ----------------------------
        
        #======================== Filter concat =============================
        
        #block 1
        #512 x 12 x 12 --> 512 12 x 12  , kernel size = 1, SAME Padding
        self.pool1 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1 )
        
        #512 x 12 x 12 --> 64 x 12 x 12  , SAME Padding 
        self.conv1a = nn.Conv2d(512, 64, kernel_size=1, padding=0 ) 
        
        #block 2
        #512 x 12 x 12 --> 192 x 12 x 12  , SAME Padding 
        self.conv2a = nn.Conv2d(512, 192, kernel_size=1, padding=0 ) 
        
        #block 3
        #512 x 12 x 12 --> 96 x 12 x 12  , SAME Padding 
        self.conv3a = nn.Conv2d(512, 96, kernel_size=1, padding=0 )
        
        #96 x 12 x 12 --> 112 x 12 x 12  , SAME Padding 
        self.conv3b = nn.Conv2d(96, 112, kernel_size=[7,1], padding=[3,0] )
        
        #112 x 12 x 12 --> 128 x 12 x 12  , SAME Padding 
        self.conv3c = nn.Conv2d(112, 128, kernel_size=[1,7], padding=[0,3] )
        
        #block 4
        #512 x 12 x 12 --> 96 x 12 x 12  , SAME Padding 
        self.conv4a = nn.Conv2d(512, 96, kernel_size=1, padding=0 )
        
        #96 x 12 x 12 --> 96 x 12 x 12  , SAME Padding 
        self.conv4b = nn.Conv2d(96, 96, kernel_size=[1,7], padding=[0,3] )
        
        #96 x 12 x 12 --> 112 x 12 x 12  , SAME Padding 
        self.conv4c = nn.Conv2d(96, 112, kernel_size=[7,1], padding=[3,0] )
        
        #112 x 12 x 12 --> 112 x 12 x 12  , SAME Padding 
        self.conv4d = nn.Conv2d(112, 112, kernel_size=[1,7], padding=[0,3] )
        
        #112 x 12 x 12 --> 128 x 12 x 12  , SAME Padding 
        self.conv4e = nn.Conv2d(112, 128, kernel_size=[7,1], padding=[3,0] )
        
        
        #======================== Filter concat =============================
        
        #----------------------- InceptionB block finish ----------------------------
        
    def forward(self, x):
        
        # block 1:
        y = self.pool1(x)
        y = self.conv1a(y)
        y = F.relu(y)
        
        # block 2:
        z = self.conv2a(x)
        z = F.relu(z) 
        
        #block 3:
        w = self.conv3a(x)
        w = F.relu(w)
        w = self.conv3b(w)
        w = F.relu(w)
        w = self.conv3c(w)
        w = F.relu(w)
        
        #block 4:
        v = self.conv4a(x)
        v = F.relu(v)
        v = self.conv4b(v)
        v = F.relu(v)
        v = self.conv4c(v)
        v = F.relu(v)
        v = self.conv4d(v)
        v = F.relu(v)
        v = self.conv4e(v)
        v = F.relu(v)
        
        xFC = torch.cat((y, z, w, v), 1)
        
        return xFC

In [8]:
class InceptionC(nn.Module):

    def __init__(self):

        super(InceptionC, self).__init__()
        
        #----------------------- InceptionC block start ----------------------------
        
        #======================== Filter concat =============================
        
        #block 1
        #768 x 5 x 5 --> 768 x 5 x 5  , kernel size = 3, SAME Padding
        self.pool1 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1 )
        
        #768 x 5 x 5 --> 128 x 5 x 5  , SAME Padding 
        self.conv1a = nn.Conv2d(768, 128, kernel_size=1, padding=0 ) 
        
        #block 2
        #768 x 5 x 5 --> 128 x 5 x 5  , SAME Padding 
        self.conv2a = nn.Conv2d(768, 128, kernel_size=1, padding=0 ) 
        
        #block 3
        #768 x 5 x 5 --> 192 x 5 x 5  , SAME Padding 
        self.conv3a = nn.Conv2d(768, 192, kernel_size=1, padding=0 )
        
        #192 x 5 x 5 --> 128 x 5 x 5  , SAME Padding 
        self.conv3b = nn.Conv2d(192, 128, kernel_size=[1,3], padding=[0,1] )
        
        #192 x 5 x 5 --> 128 x 5 x 5  , SAME Padding 
        self.conv3c = nn.Conv2d(192, 128, kernel_size=[3,1], padding=[1,0] )
        
        #block 4
        #768 x 5 x 5--> 192 x 5 x 5  , SAME Padding 
        self.conv4a = nn.Conv2d(768, 192, kernel_size=1, padding=0 )
        
        #192 x 5 x 5 --> 224 x 5 x 5  , SAME Padding 
        self.conv4b = nn.Conv2d(192, 224, kernel_size=[1,3], padding=[0,1] )
        
        #224 x 5 x 5 --> 256 x 5 x 5  , SAME Padding 
        self.conv4c = nn.Conv2d(224, 256, kernel_size=[3,1], padding=[1,0] )
        
        #256 x 5 x 5 --> 128 x 5 x 5  , SAME Padding 
        self.conv4d = nn.Conv2d(256, 128, kernel_size=[3,1], padding=[1,0])
        
        #256 x 5 x 5 --> 128 x 5 x 5  , SAME Padding 
        self.conv4e = nn.Conv2d(256, 128, kernel_size=[1,3], padding=[0,1] )
        
        
        #======================== Filter concat =============================
        
        #----------------------- InceptionC block finish ----------------------------
        
    def forward(self, x):
        
        # block 1:
        y = self.pool1(x)
        y = self.conv1a(y)
        y = F.relu(y)
        
        # block 2:
        z = self.conv2a(x)
        z = F.relu(z) 
        
        #block 3:
        w = self.conv3a(x)
        w = F.relu(w)
        w1 = self.conv3b(w)
        w1 = F.relu(w1)
        w2 = self.conv3c(w)
        w2 = F.relu(w2)
        
        #block 4:
        v = self.conv4a(x)
        v = F.relu(v)
        v = self.conv4b(v)
        v = F.relu(v)
        v = self.conv4c(v)
        v = F.relu(v)
        v1 = self.conv4d(v)
        v1 = F.relu(v1)
        v2 = self.conv4e(v)
        v2 = F.relu(v2)
        
        xFC = torch.cat((y, z, w1, w2, v1, v2), 1)
        
        return xFC

In [9]:
class ReductionA(nn.Module):

    def __init__(self):

        super(ReductionA, self).__init__()
        
        #----------------------- ReductionA block start ----------------------------
        
        #======================== Filter concat =============================
        
        #block 1
        #192 x 26 x 26 --> 192 x 12 x 12  , kernel size = 3, VALID Padding
        self.pool1  = nn.MaxPool2d(kernel_size=3, stride=2, padding=0 )
        
        #block 2
        #192 x 26 x 26 --> 192 x 12 x 12  , VALID Padding -- 
        self.conv1a = nn.Conv2d(192, 192, kernel_size=3, stride=2, padding=0 ) 
        
        #block 3
        #192 x 26 x 26 --> 96 x 26 x 26  , SAME Padding 
        self.conv2a = nn.Conv2d(192, 96, kernel_size=1, padding=0 )
        
        #96 x 26 x 26 --> 112 x 26 x 26  , SAME Padding 
        self.conv2b = nn.Conv2d(96, 112, kernel_size=3, padding=1 )
        
        #112 x 26 x 26 --> 128 x 12 x 12  , VALID Padding 
        self.conv2c = nn.Conv2d(112, 128, kernel_size=3, stride=2, padding=0 )
                
        #======================== Filter concat =============================
        
        #----------------------- ReductionA block finish ----------------------------
        
    def forward(self, x):

        # block 1:
        y = self.pool1(x)
        
        # block 2:
        z = self.conv1a(x)
        z = F.relu(z) 
        
        #block 3:
        w = self.conv2a(x)
        w = F.relu(w)
        w = self.conv2b(w)
        w = F.relu(w)
        w = self.conv2c(w)
        w = F.relu(w)
                
        xFC = torch.cat((y, z, w), 1)
        
        return xFC

In [10]:
class ReductionB(nn.Module):

    def __init__(self):

        super(ReductionB, self).__init__()
        
        #----------------------- ReductionB block start ----------------------------
        
        #======================== Filter concat =============================
        
        #block 1
        #512 x 12 x 12 --> 512 x 5 x 5  , kernel size = 3, VALID Padding
        self.pool1  = nn.MaxPool2d(kernel_size=3, stride=2, padding=0 )
        
        #block 2
        #512 x 12 x 12 --> 96 x 12 x 12  , SAME Padding -- 
        self.conv1a = nn.Conv2d(512, 96, kernel_size=1, padding=0 ) 
        
        #96 x 12 x 12 --> 96 x 5 x 5  , VALID Padding -- 
        self.conv1b = nn.Conv2d(96, 96, kernel_size=3, stride=2, padding=0 ) 
        
        #block 3
        #512 x 12 x 12 --> 128 x 12 x 12  , SAME Padding 
        self.conv2a = nn.Conv2d(512, 128, kernel_size=1, padding=0 )
        
        #128 x 12 x 12 --> 128 x 12 x 12  , SAME Padding 
        self.conv2b = nn.Conv2d(128, 128, kernel_size=[1, 7], padding=[0,3] )
        
        #128 x 12 x 12 --> 160 x 12 x 12  , SAME Padding 
        self.conv2c = nn.Conv2d(128, 160, kernel_size=[7, 1], padding=[3,0] )
        
        #160 x 12 x 12 --> 160 x 5 x 5  , VALID Padding 
        self.conv2d = nn.Conv2d(160, 160, kernel_size=3, stride=2, padding=0 )
                
        #======================== Filter concat =============================
        
        #----------------------- ReductionB block finish ----------------------------
        
    def forward(self, x):

        # block 1:
        y = self.pool1(x)
        
        # block 2:
        z = self.conv1a(x)
        z = F.relu(z) 
        z = self.conv1b(z)
        z = F.relu(z) 
        
        #block 3:
        w = self.conv2a(x)
        w = F.relu(w)
        w = self.conv2b(w)
        w = F.relu(w)
        w = self.conv2c(w)
        w = F.relu(w)
        w = self.conv2d(w)
        w = F.relu(w)
                
        xFC = torch.cat((y, z, w), 1)
        
        return xFC

In [11]:
class Inception_v4_convnet(nn.Module):

    def __init__(self):

        super(Inception_v4_convnet, self).__init__()
        

        # Special attributs
        # self.input_space = None
        # self.input_size = (32, 32, 3)
        # self.num_classes = 10;

        # Modules
        self.features = nn.Sequential(
            Stem(),
            InceptionA(),
            #InceptionA(),
            #InceptionA(),
            #InceptionA(),
            ReductionA(), 
            InceptionB(),
            #InceptionB(),
            #InceptionB(),
            #InceptionB(),
            #InceptionB(),
            #InceptionB(),
            #InceptionB(),
            ReductionB(), 
            InceptionC(),
            #InceptionC(),
            #InceptionC()
        )
        
        self.pool = nn.AvgPool2d(kernel_size=5, stride=1,  padding=0 )
        self.linear = nn.Linear(768, 10)
        self.dropout = nn.Dropout(0.8) 

    def forward(self, input):
        x = self.features(input)
        
        x = self.pool(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        
        return x

In [12]:
net=Inception_v4_convnet()

print(net)
utils.display_num_param(net)

Inception_v4_convnet(
  (features): Sequential(
    (0): Stem(
      (conv1a): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
      (conv1b): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (pool1): MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
      (conv2a): Conv2d(32, 48, kernel_size=(3, 3), stride=(1, 1))
      (conv3a): Conv2d(80, 32, kernel_size=(1, 1), stride=(1, 1))
      (conv3b): Conv2d(32, 48, kernel_size=(3, 3), stride=(1, 1))
      (conv4a): Conv2d(80, 32, kernel_size=(1, 1), stride=(1, 1))
      (conv4b): Conv2d(32, 32, kernel_size=[7, 1], stride=(1, 1), padding=[3, 0])
      (conv4c): Conv2d(32, 32, kernel_size=[1, 7], stride=(1, 1), padding=[0, 3])
      (conv4d): Conv2d(32, 48, kernel_size=(3, 3), stride=(1, 1))
      (conv5a): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (1): InceptionA(
      (pool1): AvgPool2d(kernel_size=3, stride=1, padding=1)
      (conv1a): Conv2d(192, 48, ker

In [13]:
net = net.to(device)

In [14]:
criterion = nn.CrossEntropyLoss()
my_lr=0.045 
bs= 100

In [15]:
def eval_on_test_set():

    running_error=0
    num_batches=0

    for i in range(0,10000,bs):

        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]

        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = minibatch_data

        scores=net( inputs ) 

        error = utils.get_error( scores , minibatch_label)

        running_error += error.item()

        num_batches+=1

    total_error = running_error/num_batches
    print( 'error rate on test set =', total_error*100 ,'percent')

In [16]:
start=time.time()

for epoch in range(1,20):
    
    # divide the learning rate by 2.5 after every 2 epochs
    if (epoch%2 == 0):
        my_lr = my_lr / 2.5
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.Adam( net.parameters() , lr=my_lr )
        
    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    running_error=0
    num_batches=0
    
    # set the order in which to visit the image from the training set
    shuffled_indices=torch.randperm(50000)
 
    for count in range(0,50000,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        # normalize the minibatch (this is the only difference compared to before!)
        #inputs = (minibatch_data - mean)/std
        
        inputs = minibatch_data
        
        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        

        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = (time.time()-start)/60
    

    print('epoch=',epoch, '\t time=', elapsed,'min','\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set() 
    print(' ')
    

RuntimeError: CUDA error: out of memory

In [None]:
# choose a picture at random
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# send to device, rescale, and view as a batch of 1 
im = im.to(device)
#im= (im-mean) / std
im=im.view(1,3,32,32)

# feed it to the net and display the confidence scores
scores =  net(im) 
probs= F.softmax(scores, dim=1)
utils.show_prob_cifar(probs.cpu())