In [1]:
# For tips on running notebooks in Google Colab, see
# https://pytorch.org/tutorials/beginner/colab
%matplotlib inline

LSTMs in Pytorch
----------------

Before getting to the example, note a few things. Pytorch\'s LSTM
expects all of its inputs to be 3D tensors. The semantics of the axes of
these tensors is important. The first axis is the sequence itself, the
second indexes instances in the mini-batch, and the third indexes
elements of the input. We haven\'t discussed mini-batching, so let\'s
just ignore that and assume we will always have just 1 dimension on the
second axis. If we want to run the sequence model over the sentence
\"The cow jumped\", our input should look like

$$\begin{aligned}
\begin{bmatrix}
\overbrace{q_\text{The}}^\text{row vector} \\
q_\text{cow} \\
q_\text{jumped}
\end{bmatrix}
\end{aligned}$$

Except remember there is an additional 2nd dimension with size 1.

In addition, you could go through the sequence one at a time, in which
case the 1st axis will have size 1 also.

Let\'s see a quick example.


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x11378f470>

In [3]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward0>)
(tensor([[[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward0>), tensor([[[-0.9825,  0.4715, -0.0633]]], grad_fn=<StackBackward0>))


In [4]:
import pandas as pd
#make sure to upload 'imdb_small.csv' to the local directory
imdb_dataset = pd.read_csv("imdb_small.csv")
#take a look at the data
imdb_dataset.head()

Unnamed: 0.1,Unnamed: 0,review,sentiment
0,0,One of the other reviewers has mentioned that ...,positive
1,1,A wonderful little production. <br /><br />The...,positive
2,2,I thought this was a wonderful way to spend ti...,positive
3,4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
4,5,"Probably my all-time favorite movie, a story o...",positive


In [5]:
#Most students don't have access to GPUs so create a tiny version of the dataset that can fit on a CPU
imdb_dataset = pd.concat([imdb_dataset[imdb_dataset.sentiment=='positive'].head(n=20),
                          imdb_dataset[imdb_dataset.sentiment=='negative'].head(n=20)])
imdb_dataset = imdb_dataset.sample(frac=1).reset_index(drop=True)
imdb_dataset

Unnamed: 0.1,Unnamed: 0,review,sentiment
0,23,"First of all, let's get a few things straight ...",negative
1,3,Basically there's a family where a little boy ...,negative
2,14,This a fantastic movie of three prisoners who ...,positive
3,24,This was the worst movie I saw at WorldFest an...,negative
4,20,After the success of Die Hard and it's sequels...,positive
5,34,"I watched this film not really expecting much,...",negative
6,27,This film tried to be too many things all at o...,negative
7,19,An awful film! It must have been up against so...,negative
8,29,'War movie' is a Hollywood genre that has been...,positive
9,25,The Karen Carpenter Story shows a little more ...,positive


In [6]:
import torch
from torch.utils.data import Dataset, DataLoader

class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        return text, label

### Preprocessing
 Remove Punctuation and get all the words from review dataset. Count all the words and sort it based on counts



In [7]:
texts = ["This is a positive sentence.", "This is a negative sentence."]
labels = [1, 0]

dataset = TextDataset(texts, labels)
dataloader = DataLoader(dataset, batch_size=2)

for batch in dataloader:
    texts_batch, labels_batch = batch
    print(texts_batch)
    print(labels_batch)

('This is a positive sentence.', 'This is a negative sentence.')
tensor([1, 0])


In [8]:
from string import punctuation
from collections import Counter
all_reviews=list()
for text in imdb_dataset.review.to_list():
  text = text.lower()
  text = "".join([ch for ch in text if ch not in punctuation])
  all_reviews.append(text)
all_text = " ".join(all_reviews)
all_words = all_text.split()

# Count all the words using Counter Method
count_words = Counter(all_words)
total_words=len(all_words)
sorted_words=count_words.most_common(total_words)
print(f"Top ten occuring words : {sorted_words[:10]}")


Top ten occuring words : [('the', 541), ('a', 260), ('of', 253), ('to', 200), ('and', 197), ('is', 164), ('in', 132), ('br', 114), ('i', 108), ('it', 106)]


### Tokenization
 Create a dictionary to convert words to Integers based on the number of occurrence of the word

In [9]:
'''
we will start creating dictionary with index 1 because 0 is reserved for padding
'''

vocab_to_int={w:i+1 for i,(w,c) in enumerate(sorted_words)}
print(vocab_to_int)

{'the': 1, 'a': 2, 'of': 3, 'to': 4, 'and': 5, 'is': 6, 'in': 7, 'br': 8, 'i': 9, 'it': 10, 'this': 11, 'that': 12, 'was': 13, 'movie': 14, 'but': 15, 'with': 16, 'as': 17, 'for': 18, 'film': 19, 'not': 20, 'on': 21, 'its': 22, 'one': 23, 'you': 24, 'all': 25, 'at': 26, 'are': 27, 'by': 28, 'be': 29, 'have': 30, 'his': 31, 'so': 32, 'like': 33, 'from': 34, 'or': 35, 'just': 36, 'an': 37, 'what': 38, 'if': 39, 'who': 40, 'even': 41, 'some': 42, 'about': 43, 'out': 44, 'only': 45, 'he': 46, 'no': 47, 'has': 48, 'when': 49, 'my': 50, 'more': 51, 'they': 52, 'first': 53, 'very': 54, 'me': 55, 'see': 56, 'there': 57, 'we': 58, 'story': 59, 'been': 60, 'than': 61, 'much': 62, 'up': 63, 'would': 64, 'time': 65, 'which': 66, 'most': 67, 'into': 68, 'because': 69, 'bad': 70, 'way': 71, 'will': 72, 'good': 73, 'never': 74, 'go': 75, 'how': 76, 'far': 77, 'war': 78, 'show': 79, 'then': 80, 'little': 81, 'say': 82, 'do': 83, 'scenes': 84, 'another': 85, 'where': 86, 'them': 87, 'least': 88, 'reall

In [10]:
encoded_reviews=list()
for review in all_reviews:
  encoded_review=list()
  for word in review.split():
    if word not in vocab_to_int.keys():
      #if word is not available in vocab_to_int put 0 in that place
      encoded_review.append(0)
    else:
      encoded_review.append(vocab_to_int[word])
  encoded_reviews.append(encoded_review)

In [11]:
import numpy as np
'''
this step will Return features of review_ints, where each review is padded with 0's or truncated to the input seq_length.
'''
sequence_length=250
features=np.zeros((len(encoded_reviews), sequence_length), dtype=int)
for i, review in enumerate(encoded_reviews):
  review_len=len(review)
  if (review_len<=sequence_length):
    zeros=list(np.zeros(sequence_length-review_len))
    new=zeros+review
  else:
    new=review[:sequence_length]
      
  features[i,:]=np.array(new)

In [12]:
#Our dataset has ‘positive’ and ‘negative’ as a label, it will be easy if we have 1 and 0, instead of ‘positive’ and ‘negative’
labels=[1 if label.strip()=='positive' else 0 for label in imdb_dataset.sentiment.to_list()]

### Train, validation, and test set splits

In [13]:
#split_dataset into 80% training , 10% test and 10% Validation Dataset
train_x=features[:int(0.6*len(features))]
train_y=labels[:int(0.6*len(features))]
valid_x=features[int(0.6*len(features)):int(0.8*len(features))]
valid_y=labels[int(0.6*len(features)):int(0.8*len(features))]
test_x=features[int(0.8*len(features)):]
test_y=labels[int(0.8*len(features)):]
print(len(train_y), len(valid_y), len(test_y))

24 8 8


In [14]:
import torch
from torch.utils.data import DataLoader, TensorDataset

#create Tensor Dataset
train_data=TensorDataset(torch.LongTensor(train_x), torch.FloatTensor(train_y)) #Changed to LongTensor
valid_data=TensorDataset(torch.LongTensor(valid_x), torch.FloatTensor(valid_y)) #Changed to LongTensor
test_data=TensorDataset(torch.LongTensor(test_x), torch.FloatTensor(test_y)) #Changed to LongTensor

#dataloader
batch_size=24
train_loader=DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader=DataLoader(valid_data, batch_size=batch_size, shuffle=True)
test_loader=DataLoader(test_data, batch_size=batch_size, shuffle=True)

### LSTM model specification


In [15]:
import torch.nn as nn

class SentimentLSTM(nn.Module):
    """
    Basic implementation of an LSTM for binary sentiment classification.
    """
    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
        """
        Initialize the model by setting up the layers
        """
        super().__init__()
        self.output_size=output_size
        self.n_layers=n_layers
        self.hidden_dim=hidden_dim

        #Embedding and LSTM layers
        self.embedding=nn.Embedding(vocab_size, embedding_dim)
        self.lstm=nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)

        #dropout layer
        self.dropout=nn.Dropout(0.3)

        #Linear and sigmoid layer
        self.fc1=nn.Linear(hidden_dim, 64)
        self.fc2=nn.Linear(64, 16)
        self.fc3=nn.Linear(16,output_size)
        self.sigmoid=nn.Sigmoid()

    def forward(self, x, hidden):
        """
        Perform a forward pass of our model on some input and hidden state.
        """
        batch_size=x.size()

        #Embadding and LSTM output
        embedd=self.embedding(x)
        lstm_out, hidden=self.lstm(embedd, hidden)

        #stack up the lstm output
        lstm_out=lstm_out.contiguous().view(-1, self.hidden_dim)

        #dropout and fully connected layers
        out=self.dropout(lstm_out)
        out=self.fc1(out)
        out=self.dropout(out)
        out=self.fc2(out)
        out=self.dropout(out)
        out=self.fc3(out)
        sig_out=self.sigmoid(out)

        sig_out=sig_out.view(batch_size, -1)
        sig_out=sig_out[:, -1]

        return sig_out, hidden

    def init_hidden(self, batch_size):
        """Initialize Hidden STATE"""
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())

        return hidden

### Instantiate the model with hyperparameters

In [16]:
vocab_size = len(vocab_to_int)+1 # +1 for the 0 padding
output_size = 1
embedding_dim = 400
hidden_dim = 256
n_layers = 2
drop_prob = 0.5

net = SentimentLSTM(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob)
print(net)


SentimentLSTM(
  (embedding): Embedding(2550, 400)
  (lstm): LSTM(400, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=256, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


### Training

In [27]:
import torch
import torch.nn as nn

lr = 0.01

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

# check if CUDA is available
train_on_gpu = False#torch.cuda.is_available()

# training params
epochs = 

counter = 0
print_every = 10
clip = 5 # gradient clipping

# move model to GPU, if available
if(train_on_gpu):
    net.cuda()

net.train()
# train for some number of epochs
for e in range(epochs):
    print(f"epoch {e}")
    # initialize hidden state
    h = net.init_hidden(batch_size)

    # batch loop
    for inputs, labels in train_loader:
        counter += 1
        if(train_on_gpu):
            inputs=inputs.cuda()
            labels=labels.cuda()
        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        h = tuple([each.data for each in h])

        # zero out accumulated gradients
        #net.zero_grad()
        #get the output from the model
        output, h = net(inputs, h)

        # calculate the loss and perform backprop
        loss = criterion(output.squeeze(), labels.float()) # Changed labels.long() to labels.float()
        loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), clip)
        optimizer.step()

        # loss stats
        if counter % print_every == 0:
            # Get validation loss
            val_h = net.init_hidden(batch_size)
            val_losses = []
            net.eval()
            for inputs, labels in valid_loader:

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                val_h = tuple([each.data for each in val_h])

                #inputs, labels = inputs.cuda(), labels.cuda()
                output, val_h = net(inputs, val_h)
                val_loss = criterion(output.squeeze(), labels.float())

                val_losses.append(val_loss.item())

            net.train()
            print("Epoch: {}/{}...".format(e+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))

TypeError: 'float' object cannot be interpreted as an integer

### Evaluation


In [25]:
test_losses = [] # track loss
num_correct = 0

# init hidden state
h = net.init_hidden(8)
blah = True
net.eval()
# iterate over test data
for inputs, labels in test_loader:
  # Creating new variables for the hidden state, otherwise
  # we'd backprop through the entire training history
  h = tuple([each.data for each in h])

  #inputs, labels = inputs.cuda(), labels.cuda()
  output, h = net(inputs, h)
  # calculate loss
  test_loss = criterion(output.squeeze(), labels.float())
  test_losses.append(test_loss.item())

  # convert output probabilities to predicted class (0 or 1)
  pred = torch.round(output.squeeze())  # rounds to the nearest integer

  # compare predictions to true label
  correct_tensor = pred.eq(labels.float().view_as(pred))
  correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
  num_correct += np.sum(correct)

  # avg test loss
  print("Test loss: {:.3f}".format(np.mean(test_losses)))
  # accuracy over all test data
  test_acc = num_correct/len(test_loader.dataset)
  print("Test accuracy: {:.3f}".format(test_acc))

Test loss: 19.024
Test accuracy: 0.625
