INSTALL DEPENDENCIES (to be run only once in terminal)

In [2]:
#!pip install torch numpy


IMPORT LIBRARIES

In [3]:
import torch
import torch.nn as nn
import numpy as np
import json
import re


LOAD SAVED MODEL & VOCAB

In [4]:
checkpoint = torch.load("best_gru_model.pt", map_location="cpu")

vocab = checkpoint["vocab"]
EMBED_DIM = checkpoint["embed_dim"]
HIDDEN_DIM = checkpoint["hidden_dim"]

vocab_size = len(vocab)

print("Model & vocabulary loaded")
print("Vocabulary size:", vocab_size)


Model & vocabulary loaded
Vocabulary size: 81589


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


MODEL DEFINITION

In [8]:
class SentimentGRU(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.recurrent = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, x, lengths):
        x = self.embedding(x)

        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths.cpu(), batch_first=True, enforce_sorted=False
        )

        _, hidden = self.recurrent(packed)
        return self.fc(hidden[-1])



LOAD MODEL WEIGHTS

In [9]:
model = SentimentGRU(vocab_size, EMBED_DIM, HIDDEN_DIM).to(device)
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()

print("GRU model ready for inference")


GRU model ready for inference


TEXT PREPROCESSING

In [10]:
MAX_LEN = 500

def clean_text(text):
    text = text.lower()
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def tokenize(text):
    return text.split()

def encode_and_pad(tokens, vocab, max_len):
    encoded = [vocab.get(word, vocab["<UNK>"]) for word in tokens]
    length = min(len(encoded), max_len)

    if len(encoded) < max_len:
        encoded += [vocab["<PAD>"]] * (max_len - len(encoded))
    else:
        encoded = encoded[:max_len]

    return encoded, length


PREDICTION FUNCTION

In [11]:
def predict_sentiment(text):
    cleaned = clean_text(text)
    tokens = tokenize(cleaned)
    encoded, length = encode_and_pad(tokens, vocab, MAX_LEN)

    x = torch.tensor([encoded], dtype=torch.long).to(device)
    lengths = torch.tensor([length]).to(device)

    with torch.no_grad():
        outputs = model(x, lengths)
        probs = torch.softmax(outputs, dim=1)
        confidence, pred = torch.max(probs, dim=1)

    sentiment = "Positive" if pred.item() == 1 else "Negative"
    return sentiment, confidence.item()


TEST WITH SAMPLE REVIEWS

In [12]:
samples = [
    "This movie was absolutely fantastic. Brilliant acting and a great story.",
    "The film was boring, slow, and a complete waste of time.",
    "It had some good moments but overall it failed to impress me."
]

for review in samples:
    sentiment, confidence = predict_sentiment(review)
    print(f"Review: {review}")
    print(f"Prediction: {sentiment} (confidence: {confidence:.3f})\n")


Review: This movie was absolutely fantastic. Brilliant acting and a great story.
Prediction: Positive (confidence: 0.998)

Review: The film was boring, slow, and a complete waste of time.
Prediction: Negative (confidence: 0.999)

Review: It had some good moments but overall it failed to impress me.
Prediction: Negative (confidence: 0.837)

