In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import utils
import torchvision.datasets as dsets
from torch.autograd import Variable


How do you make sure that you are not overfitting in both of these cases?
One of the most important decisions that you have to make when training a network is what loss function you need, given that this is the a classifier what loss function should you use? Even if you know the answer try out different loss functions and prove to yourself that certain loss functions do much better than others, if you can derive why you should use this loss fucntion

In [9]:
def bn_drop_lin(n_in: int, n_out: int, bn: bool = True, p: float = 0., actn: torch.nn.Module = None, sequential : bool = False):
    """
    Utility function that adds batch norm, dropout and linear layer 
    
    Arguments : 
        n_in : Number of input neurons
        n_out : Number of output neurons
        bn : If there is a batch norm layer
        p : Bathc norm dropout rate
        act : Activation for the linear layer
    
    Returns : 
        List of batch norm, dropout and linear layer
    
    """
    layers = [torch.nn.BatchNorm1d(n_in)] if bn else []
    if p != 0:
        layers.append(torch.nn.Dropout(p))
    layers.append(torch.nn.Linear(n_in, n_out))
    if actn is not None:
        layers.append(actn)
    if sequential :
        return torch.nn.Sequential(layers)
    else :
        return layers


In [30]:
#layer = bn_drop_lin(n_in = 100, n_out = 500, bn = True, p = 0.5, actn = nn.ReLU())

[BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 Dropout(p=0.5),
 Linear(in_features=100, out_features=500, bias=True),
 ReLU()]

## STEP 1: LOADING DATASET

In [10]:
train_dataset = dsets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(),download=True)

test_dataset = dsets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor())

Files already downloaded and verified


## STEP 2: MAKING DATASET ITERABLE

In [11]:
batch_size = 100
n_iters = 3000
num_epochs = n_iters/(len(train_dataset)/batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

## STEP 3: CREATE MODEL CLASS

In [12]:
n_in = 100
n_out = 500
bn = True 
p = 0.5
actn = nn.ReLU()

input_dim = 32*32*3
hidden_dims = [100,500,1000,100]
num_classes = 10
bn = True
drop = 0.5


In [13]:
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim : int , hidden_dims : list, num_classes : int,bn : bool, drop: float):
        """
        Simple Feedforward Net that accpest variable amount of layers
        
        Arguments : 
            input_dim : Size of the input dimension
            hidden_dims : List containing size of all hidden layers
            num_classes : Number of classes
            bn : If there is a batch norm layer
            drop : dropout rate
        """
        super(FeedforwardNeuralNetModel, self).__init__()
        
        layer_size = [input_dim] +hidden_dims +[num_classes]
        
        layers = []
        for n_in, n_out in zip(layer_size[:-1], layer_size[1:]):
            if n_out != hidden_dims :
                #add ReLU for every layer except last one
                layers += bn_drop_lin(n_in, n_out, bn, drop, actn)
            else : 
                #don't add ReLU to last layer
                layers += bn_drop_lin(n_in, n_out, bn, drop, None)
        self.layers = nn.Sequential(layers)
        
        
    
    def forward(self, x):
        # Linear function 1
        out = self.layers(x)
        return out

## STEP 4: INSTANTIATE MODEL CLASS

In [14]:
#model = FeedforwardNeuralNetModel(input_dim, hidden_dims,num_classes)
#  USE GPU FOR MODEL  

#if torch.cuda.is_available():
 #   model.cuda()
model = FeedforwardNeuralNetModel(input_dim, hidden_dims, num_classes, bn, drop)

TypeError: list is not a Module subclass

## STEP 5: INSTANTIATE LOSS CLASS

In [6]:
criterion = nn.CrossEntropyLoss()

## STEP 6: INSTANTIATE OPTIMIZER CLASS

In [7]:
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## STEP 7: TRAIN THE MODEL

In [8]:

iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        #  USE GPU FOR MODEL  #
        if torch.cuda.is_available():
            images = Variable(images.view(-1, 3*32*32).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1, 3*32*32))
            labels = Variable(labels)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                images = Variable(images.view(-1,3*32*32).cuda())
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                # Total correct predictions
                correct += (predicted.cpu() == labels.cpu()).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))

RuntimeError: cuda runtime error (48) : no kernel image is available for execution on the device at C:/ProgramData/Miniconda3/conda-bld/pytorch_1532509700152/work/aten/src/THC/THCTensorCopy.cu:206