In [1]:
from data_loader import *
num_epochs = 1           # Number of full passes through the dataset
batch_size = 16          # Number of samples in each minibatch
learning_rate = 0.001  

train_loader, val_loader, test_loader = create_split_loaders(batch_size,shuffle=True, show_sample=False)
print(train_loader.dataset.input_data.shape) 

[27 81 82 ... 67 76 66]
[81 82 63 ... 76 66 29]
[27 81 82 ... 67 76 66]
[81 82 63 ... 76 66 29]
[27 81 82 ... 67 76 66]
[81 82 63 ... 76 66 29]
(3799, 100)


In [8]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as func
import torch.nn.init as torch_init
import torch.optim as optim

class LSTM(nn.Module):
    """ A basic LSTM model. 
    
    Consists of one hidden layer:
    
    conv1 -> conv2 -> conv3 -> maxpool -> fc1 -> fc2 (outputs)
    """
    
    def __init__(self, in_dim, out_dim, hid_dim, batch_size, no_layers =1):
        super(LSTM, self).__init__()
        #specify the input dimensions
        self.in_dim = in_dim
        #specify the output dimensions
        self.out_dim = out_dim
        #specify the batch size
        self.batch_size = batch_size
        #specify hidden layer dimensions
        self.hid_dim = hid_dim
        #specify the number of layers
        self.no_layers = no_layers  
        
        #initialise the LSTM
        self.model = nn.LSTM(self.in_dim, self.hid_dim, self.no_layers)
        #define the outputs of the model, we're using a softmax
        self.outputs = nn.Linear(self.hid_dim, out_dim)

    def forward(self, batch):
        """Pass the batch of images through each layer of the network, applying 
        non-linearities after each layer.
        
        Note that this function *needs* to be called "forward" for PyTorch to 
        automagically perform the forward pass. 
        
        Params:
        -------
        - batch: (Tensor) An input batch of images

        Returns:
        --------
        - logits: (Variable) The output of the network
        """
        
        #input "batch" is a tensor of dimensions batch_sizexchunk_sizexdictionary_size
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.model(batch.view(len(batch), self.batch_size, -1))
        
        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.outputs(lstm_out[-1].view(self.batch_size, -1))
        tag_scores = func.softmax(y_pred, dim=1)
        return tag_scores
    def init_hidden(self):
            return (torch.zeros(self.no_layers, self.batch_size, self.hid_dim))
model = LSTM(in_dim=100, out_dim=100,hid_dim=100,batch_size=16,no_layers=1)
print(model)
        

LSTM(
  (model): LSTM(100, 100)
  (outputs): Linear(in_features=100, out_features=100, bias=True)
)


In [9]:
# Setup: initialize the hyperparameters/variables
num_epochs = 1           # Number of full passes through the dataset
batch_size = 16          # Number of samples in each minibatch
learning_rate = 0.001  
use_cuda=torch.cuda.is_available()
# #TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
# transform = transforms.Compose(______)


# # Check if your system supports CUDA
# use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

# Setup the training, validation, and testing dataloaders
train_loader, val_loader, test_loader = create_split_loaders(batch_size,shuffle=False, show_sample=False,extras=extras)

# Instantiate a BasicCNN to run on the GPU or CPU based on CUDA support
model = LSTM(in_dim=100, out_dim=100,hid_dim=100,batch_size=16,no_layers=1)
model = model.to(computing_device)
print("Model on CUDA?", next(model.parameters()).is_cuda)

#TODO: Define the loss criterion and instantiate the gradient descent optimizer
criterion = torch.nn.CrossEntropyLoss() #TODO - loss criteria are defined in the torch.nn package

#TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

CUDA is supported
[27 81 82 ... 67 76 66]
[81 82 63 ... 76 66 29]
[27 81 82 ... 67 76 66]
[81 82 63 ... 76 66 29]
[27 81 82 ... 67 76 66]
[81 82 63 ... 76 66 29]
Model on CUDA? True


In [10]:
# Track the loss across training
total_loss = []
avg_minibatch_loss = []

# Begin training procedure
for epoch in range(num_epochs):

    N = 50
    N_minibatch_loss = 0.0    

    # Get the next minibatch of images, labels for training
    for minibatch_count, (images, labels) in enumerate(train_loader, 0):
        
        #print(images.shape,images.permute(1,0,2).shape)
        #print(minibatch_count)
        images=images.permute(1,0,2)
        labels=labels.permute(1,0,2)
        
        # Put the minibatch data in CUDA Tensors and run on the GPU if supported
        images, labels = images.to(computing_device), labels.to(computing_device)

        # Zero out the stored gradient (buffer) from the previous iteration
        optimizer.zero_grad()

        # Perform the forward pass through the network and compute the loss
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Automagically compute the gradients and backpropagate the loss through the network
        loss.backward()

        # Update the weights
        optimizer.step()

        # Add this iteration's loss to the total_loss
        total_loss.append(loss.item())
        N_minibatch_loss += loss
        
        #TODO: Implement cross-validation
        
        if minibatch_count % N == 0:    
            
            # Print the loss averaged over the last N mini-batches    
            N_minibatch_loss /= N
            print('Epoch %d, average minibatch %d loss: %.3f' %
                (epoch + 1, minibatch_count, N_minibatch_loss))
            
            # Add the averaged loss over N minibatches and reset the counter
            avg_minibatch_loss.append(N_minibatch_loss)
            N_minibatch_loss = 0.0

    print("Finished", epoch + 1, "epochs of training")
print("Training complete after", epoch, "epochs")

TypeError: forward() takes from 2 to 3 positional arguments but 4 were given