In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as func
import torch.nn.init as torch_init
import torch.optim as optim

class LSTM(nn.Module):
    """ A basic LSTM model. 
    
    Consists of one hidden layer:
    
    conv1 -> conv2 -> conv3 -> maxpool -> fc1 -> fc2 (outputs)
    """
    
    def __init__(self, in_dim, out_dim, hid_dim, batch_size, no_layers =1):
        super(LSTM, self).__init__()
        #specify the input dimensions
        self.in_dim = in_dim
        #specify the output dimensions
        self.out_dim = out_dim
        #specify the batch size
        self.batch_size = batch_size
        #specify hidden layer dimensions
        self.hid_dim = hid_dim
        #specify the number of layers
        self.no_layers = no_layers  
        
        #initialise the LSTM
        self.model = nn.LSTM(self.in_dim, self.hid_dim, self.no_layers)
        #define the outputs of the model, we're using a softmax
        self.outputs = nn.Linear(self.hidden_dim, output_dim)

    def forward(self, batch):
        """Pass the batch of images through each layer of the network, applying 
        non-linearities after each layer.
        
        Note that this function *needs* to be called "forward" for PyTorch to 
        automagically perform the forward pass. 
        
        Params:
        -------
        - batch: (Tensor) An input batch of images

        Returns:
        --------
        - logits: (Variable) The output of the network
        """
        
        #input "batch" is a tensor of dimensions batch_sizexchunk_sizexdictionary_size
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.model(batch.view(len(batch), self.batch_size, -1))
        
        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.outputs(lstm_out[-1].view(self.batch_size, -1))
        tag_scores = func.log_softmax(y_pred, dim=1)
        return y_pred.view(-1)
    def init_hidden(self)
            return (torch.zeros(self.no_layers, self.batch_size, self.hid_dim),
            torch.zeros(self.no_layers, self.batch_size, self.hid_dim))

        

In [None]:

# Setup: initialize the hyperparameters/variables
num_epochs = 100           # Number of full passes through the dataset
batch_size = 16          # Number of samples in each minibatch

#TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
#for tranfer learning we have the model create fake rgb channels and normalize as needed
transform = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])


# Check if your system supports CUDA

use_cuda = torch.cuda.is_available()
print(torch.cuda.is_available())

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")
                

In [None]:
#TODO: Define the loss criterion and instantiate the gradient descent optimizer
#criterion = nn.BCEWithLogitsLoss() #TODO - loss criteria are defined in the torch.nn package
#criterion = wl.weighted_loss_custom(vgg16)
criterion = wl.using_pytorch_weight_loss(vgg16)

#TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
optimizer = optim.Adam(vgg16.parameters(), lr=1e-05) #TODO - optimizers are defined in the torch.optim package
#optimizer = optim.SGD(vgg16.parameters(), lr = 5e-04, momentum=0.9)