# Evaluation of RNN

In [8]:
import torch
import torch.nn as nn
import re
from collections import Counter
from datasets import load_dataset

In [9]:
dataset = load_dataset("imdb")
train = dataset['train']
test = dataset['test'].select(range(2500))
# Data Preprocessing
vocab_size = 10000
counter = Counter()

def tokenize(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text.lower().split()

for example in train:
    counter.update(tokenize(example['text']))

vocab = {word: idx+2 for idx, (word, _) in enumerate(counter.most_common(vocab_size))}
vocab['<PAD>'] = 0
vocab['<UNK>'] = 1


def encode_sentence(text):
    return [vocab.get(word, vocab['<UNK>']) for word in tokenize(text)]

vocab

{'the': 2,
 'and': 3,
 'a': 4,
 'of': 5,
 'to': 6,
 'is': 7,
 'in': 8,
 'it': 9,
 'i': 10,
 'this': 11,
 'that': 12,
 'br': 13,
 'was': 14,
 'as': 15,
 'for': 16,
 'with': 17,
 'movie': 18,
 'but': 19,
 'film': 20,
 'on': 21,
 'not': 22,
 'you': 23,
 'are': 24,
 'his': 25,
 'have': 26,
 'he': 27,
 'be': 28,
 'one': 29,
 'its': 30,
 'at': 31,
 'all': 32,
 'by': 33,
 'an': 34,
 'they': 35,
 'from': 36,
 'who': 37,
 'so': 38,
 'like': 39,
 'her': 40,
 'just': 41,
 'or': 42,
 'about': 43,
 'has': 44,
 'if': 45,
 'out': 46,
 'some': 47,
 'there': 48,
 'what': 49,
 'good': 50,
 'when': 51,
 'more': 52,
 'very': 53,
 'even': 54,
 'she': 55,
 'my': 56,
 'no': 57,
 'up': 58,
 'would': 59,
 'which': 60,
 'only': 61,
 'time': 62,
 'really': 63,
 'story': 64,
 'their': 65,
 'were': 66,
 'had': 67,
 'see': 68,
 'can': 69,
 'me': 70,
 'than': 71,
 'we': 72,
 'much': 73,
 'well': 74,
 'been': 75,
 'get': 76,
 'will': 77,
 'into': 78,
 'also': 79,
 'because': 80,
 'other': 81,
 'do': 82,
 'people': 83

In [10]:
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab['<PAD>'])
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)
        output, hidden = self.rnn(embedded)
        return self.fc(hidden.squeeze(0))

In [11]:
model = RNNClassifier(vocab_size=len(vocab), embed_dim=128, hidden_dim=128, output_dim=2)
model.load_state_dict(torch.load("../models/rnn"))
model.eval()

RNNClassifier(
  (embedding): Embedding(10002, 128, padding_idx=0)
  (rnn): RNN(128, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

In [12]:
def predict_sentiment(text):
    encoded = torch.tensor(encode_sentence(text), dtype=torch.long).unsqueeze(0)  # add batch dim
    with torch.no_grad():
        output = model(encoded)
        prediction = torch.argmax(output, dim=1).item()
        return "Positive" if prediction == 1 else "Negative"

In [18]:
test_text = "Wow what a great movie!"
print(f"Sentence: '{test_text}'")
print("Predicted sentiment:", predict_sentiment(test_text))


Sentence: 'Wow what a great movie!'
Predicted sentiment: Positive


In [21]:
test_text = "That was waste of my time"
print(f"Sentence: '{test_text}'")
print("Predicted sentiment:", predict_sentiment(test_text))

Sentence: 'That was waste of my time'
Predicted sentiment: Negative
