# Settings

In this section, we set the seed for re-productivity, define the field, and also define the train and test set.

In [3]:
import torch
from torchtext import data
from torchtext import datasets
import random

SEED = 1234

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

TEXT = data.Field(tokenize='spacy')
LABEL = data.LabelField(tensor_type=torch.FloatTensor)

#  split the train and test sets

train, test = datasets.IMDB.splits(TEXT, LABEL)

train, valid = train.split(random_state=random.seed(SEED))


In [7]:
TEXT.build_vocab(train, max_size=25000, vectors="glove.6B.100d")
LABEL.build_vocab(train)

In [8]:
BATCH_SIZE = 16 # cannot run with too large batch size

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train, valid, test), 
    batch_size=BATCH_SIZE, 
    sort_key=lambda x: len(x.text), 
    repeat=False)

# The RNN-GRU setup

In this section we define the RNN-GRU model as a comparison to the RNN-LSTM model and replace the original word-embeddings with the pre-trained embeddings we just downloaded.

In [9]:
#  the setup of RNN class

import torch.nn as nn

class RNN_GRU(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(x))
        
        #embedded = [sent len, batch size, emb dim]
        
        em = self.rnn(embedded)
        #print(em)
        #output, (hidden, cell) = em
        output, hidden = em
        
        #output = [sent len, batch size, hid dim * num directions]
        #hidden = [num layers * num directions, batch size, hid. dim]
        #cell = [num layers * num directions, batch size, hid. dim]
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
                
        #hidden [batch size, hid. dim * num directions]
            
        return self.fc(hidden.squeeze(0))

In [10]:
#  initialized params 

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5

model = RNN_GRU(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)

#  retrive the embeddings and then replace them

pretrained_embeddings = TEXT.vocab.vectors


In [11]:
model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [ 0.1468,  0.3804,  0.4931,  ...,  1.8147,  0.1932,  0.6185],
        [ 0.2095,  0.2334,  0.4693,  ..., -0.5408, -1.0424,  0.1241],
        [ 0.9397, -0.6000, -0.4878,  ...,  0.1300,  0.5543,  0.6509]])

# Training

In this section we define the training function for our model on the GPU and also define a function calculating the accuracy, just as we did in the model of RNN-LSTM.

In [13]:
# basic setup
import torch.optim  as optim
optimizer = optim.Adam(model.parameters())

criterion = nn.BCEWithLogitsLoss()  #  the loss function 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)
criterion = criterion.to(device)

The accuracy function:

In [14]:
# the accuracy function
import torch.nn.functional as F

def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(F.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum()/len(correct)
    return acc

In [15]:
# the training function
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model(batch.text).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# Evaluation

In this section we define an evaluation function and then evaluate the trained model in terms of the accuracy we defined above.

In [16]:
# the evaluation function
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.text).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [17]:
# the trained results of our trained model with validation set
N_EPOCHS = 5 

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc*100:.2f}%, Val. Loss: {valid_loss:.3f}, Val. Acc: {valid_acc*100:.2f}%')

  return Variable(arr, volatile=not train)


Epoch: 01, Train Loss: 0.547, Train Acc: 68.94%, Val. Loss: 0.288, Val. Acc: 88.59%


KeyboardInterrupt: 

Note: Since it takes too long to run multiple epochs, so I just get one result from them and then shut down the process. In this case, the model is not well-trained, but the idea behind is clear.

In [20]:
# then on the test set:

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc*100:.2f}%')

  return Variable(arr, volatile=not train)


Test Loss: 0.283, Test Acc: 88.56%


# Conclusion

As we can see from the result of this testset, the accuracy of RNN-GRU (88.56%) is slightly higher than the RNN-LSTM provided in the sample (~86%). 

# Usage of the model

In this section, we try to predict the sentiment with our trained model. We first define the predict function, implementing our model, and then run some sample test.

In [21]:
import spacy
nlp = spacy.load('en')

#  the prediction function
def predict_sentiment(sentence):
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    prediction = F.sigmoid(model(tensor))
    return prediction.item()

In [27]:
# In the case the model works well
print("the probability of positive sentiment:",predict_sentiment("this film sucks"))
print("the probability of positive sentiment:",predict_sentiment("this film is great"))

the probability of positive sentiment: 0.19117307662963867
the probability of positive sentiment: 0.9527433514595032




In [29]:
# In the case the model does not work well
print("the probability of positive sentiment:",predict_sentiment("I prefer to die rather than watch this film"))

the probability of positive sentiment: 0.6042129993438721


