In [1]:
import torch
import torch.nn as nn
import time

  from .autonotebook import tqdm as notebook_tqdm


## Load data

In [2]:
data_path = '../'
full_data = torch.load(data_path + 'unaug_data_small.pt').float().contiguous()
full_label = torch.load(data_path + 'unaug_label.pt')
full_label = torch.tensor(full_label)

full_label = full_label - 1


In [3]:
# TODO: split into training and testing 80%-20%
train_data = full_data
train_label = full_label

In [4]:
# show size
print("Training data, ", train_data.size())
print("Training label,", train_label.size())


Training data,  torch.Size([6112, 3, 32, 32])
Training label, torch.Size([6112])


In [5]:
train_data_size = train_data.size(0)
print(train_data_size)
print(train_data.is_contiguous())

6112
True


## Utility Functions
- display num param
- get error

In [6]:
# Util func
def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('There are {} ({:.2f} million) parameters in this neural network'.format(
        nb_param, nb_param/1e6)
         )

In [7]:
def get_error( scores , labels ):
    bs=scores.size(0)
    predicted_labels = scores.argmax(dim=1)
    indicator = (predicted_labels == labels)
    num_matches=indicator.sum()
    
    return 1-num_matches.float()/bs   

## Build MLP
A 3-layer MLP

In [8]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2,  output_size):
        super(three_layer_net , self).__init__()

        self.layer1 = nn.Linear(input_size, hidden_size1)
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.layer3 = nn.Linear(hidden_size2, output_size)   
        
    def forward(self, x):
        y       = self.layer1(x)
        y_hat   = torch.relu(y)
        z       = self.layer2(y_hat)
        z_hat   = torch.relu(z)
        scores  = self.layer3(z_hat)
        
        return scores

In [9]:
net=three_layer_net(3072,100,100,10)
print(net)
display_num_param(net)

three_layer_net(
  (layer1): Linear(in_features=3072, out_features=100, bias=True)
  (layer2): Linear(in_features=100, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
)
There are 318410 (0.32 million) parameters in this neural network


## Define Hyper-parameters

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( net.parameters() , lr=0.01 )
bs= 100

## Training

In [12]:
start=time.time()

for epoch in range(10):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(train_data_size)
    print(shuffled_indices)
 
    for count in range(0, (train_data_size//bs)*bs ,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # reshape the minibatch
        inputs = minibatch_data.view(bs,3072)  # to continuous tensor

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        

        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    
    if epoch%1 == 0:
        print('epoch=',epoch, '\t time=', elapsed, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        #eval_on_test_set() 
        print(' ')
    
    
        
        

tensor([1965, 3309, 5946,  ...,  968, 5295, 4519])
epoch= 0 	 time= 0.2573399543762207 	 loss= 2.237499209701038 	 error= 83.6393446218772 percent
 
tensor([4777, 3972, 4668,  ..., 5290, 1246, 2796])
epoch= 1 	 time= 0.45740509033203125 	 loss= 2.2345253170513715 	 error= 83.68852500055657 percent
 
tensor([4971, 4917,  901,  ..., 1197, 2419,  357])
epoch= 2 	 time= 0.6969962120056152 	 loss= 2.2314928672352776 	 error= 83.6393449150148 percent
 
tensor([  45, 3488, 4823,  ..., 5236, 2808, 2006])
epoch= 3 	 time= 0.8927221298217773 	 loss= 2.2288295831836638 	 error= 83.62295099946319 percent
 
tensor([2538, 1432, 2045,  ..., 4496, 4940, 4205])
epoch= 4 	 time= 1.1034281253814697 	 loss= 2.2266235546987563 	 error= 83.60655776789932 percent
 
tensor([5227,   94, 5983,  ..., 3024,  601, 1852])
epoch= 5 	 time= 1.288559913635254 	 loss= 2.2248845295827895 	 error= 83.65573795115363 percent
 
tensor([ 894, 2631, 3232,  ...,  570, 1389, 3924])
epoch= 6 	 time= 1.5014359951019287 	 loss= 2.