## RNN for classification

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

sentences = [
    "I love this movie", "This film is great", "Amazing experience",
    "I enjoyed it", "Best movie ever", "I hate this movie", 
    "This film is terrible", "Awful experience", "I disliked it", 
    "Worst movie ever"
]
labels = torch.tensor([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype=torch.float32)

In [2]:
# Vocabulary and tokenization
vocab = {"<unk>": 0}
tokenized_sentences = []

for sentence in sentences:
    tokenized_sentence = []
    for word in sentence.lower().split():
        if word not in vocab:
            vocab[word] = len(vocab)
        tokenized_sentence.append(vocab[word])
    tokenized_sentences.append(torch.tensor(tokenized_sentence))

In [3]:
tokenized_sentences[0:5]

[tensor([1, 2, 3, 4]),
 tensor([3, 5, 6, 7]),
 tensor([8, 9]),
 tensor([ 1, 10, 11]),
 tensor([12,  4, 13])]

In [4]:
# Model parameters
embedding_dim = 10
hidden_dim = 8
num_layers = 1
learning_rate = 0.01
num_epochs = 100
vocab_size = len(vocab)

In [None]:
embedding = nn.Embedding(vocab_size, embedding_dim)
rnn = nn.RNN(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)
fc = nn.Linear(in_features=hidden_dim, out_features=1)
sigmoid = nn.Sigmoid()

# Training setup
optimizer = optim.Adam(list(embedding.parameters()) + list(rnn.parameters()) + list(fc.parameters()), lr=learning_rate)
criterion = nn.BCELoss() 

In [6]:
embedding(tokenized_sentences[0])

tensor([[ 0.0734, -1.0112,  0.1437,  1.3157, -1.4879, -0.0434, -0.7689, -1.3871,
         -0.8758,  1.1015],
        [ 0.5250, -0.7598,  1.1881, -0.6249, -1.1482,  0.2297,  1.3443,  0.1197,
          0.3731, -0.2501],
        [-1.3194,  1.0482,  1.2214,  1.5471,  1.1125, -0.1039, -0.3671, -0.1031,
         -0.4327,  0.0018],
        [ 0.1261, -0.3564, -1.2284,  1.7846,  0.5955,  0.4956,  1.0887, -0.3518,
         -0.5098, -1.4278]], grad_fn=<EmbeddingBackward0>)

In [None]:
# Training loop
for epoch in range(num_epochs):
    total_loss = 0
    for i, sentence in enumerate(tokenized_sentences):
        sentence = sentence.unsqueeze(0)  
        label = labels[i].view(1, 1)
        
        embedded = embedding(sentence)
        output, _ = rnn(embedded)
        output_mean = output.mean(dim=1)
        prediction = sigmoid(fc(output_mean))
        # print(prediction)
        loss = criterion(prediction, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(tokenized_sentences):.4f}')



Epoch 10/100, Loss: 0.2670
Epoch 20/100, Loss: 0.0999
Epoch 30/100, Loss: 0.0551
Epoch 40/100, Loss: 0.0385
Epoch 50/100, Loss: 0.0302
Epoch 60/100, Loss: 0.0246
Epoch 70/100, Loss: 0.0206
Epoch 80/100, Loss: 0.0176
Epoch 90/100, Loss: 0.0152
Epoch 100/100, Loss: 0.0132


In [None]:
def get_sentiment(sentence):
    tokenized_sentence = []
    for word in sentence.lower().split():
        tokenized_sentence.append(vocab.get(word, vocab["<unk>"]))
    tokenized_sentence = torch.tensor(tokenized_sentence).unsqueeze(0)  
    print(tokenized_sentence)
    with torch.no_grad():
        embedded = embedding(tokenized_sentence)
        output, _ = rnn(embedded)
        output_mean = output.mean(dim=1)
        prediction = sigmoid(fc(output_mean))
        print(prediction)
        predicted_label = (prediction >= 0.5).float().item()
        print(predicted_label)

    return "Positive" if predicted_label == 1.0 else "Negative"

In [10]:
get_sentiment('this movie is good')

tensor([[3, 4, 6, 0]])
tensor([[0.9818]])
1.0


'Positive'

In [13]:
get_sentiment('this course is Amazing')

tensor([[3, 0, 6, 8]])
tensor([[0.9963]])
1.0


'Positive'

In [17]:
get_sentiment('not bad')

tensor([[0, 0]])
tensor([[0.9330]])
1.0


'Positive'