In [1]:
from rnn import RNNModel
from lstm import LSTMModel
from transformer import DecoderOnlyLM
import torch
import torch.nn as nn

In [2]:
model_path = 'best_model_RNNModel.pth'
TOKENIZER_PATH = "bpe_tokenizer.model"
TRAIN_FILE = "data/train.jsonl"
VAL_FILE = "data/test.jsonl"
device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


In [3]:
rnn_model = RNNModel(
    vocab_size=10000,
    embedding_dim=128,
    hidden_dim=256,
    num_layers=2,
    dropout=0.2
)

lstm_model = LSTMModel(
    vocab_size=10000,
    embedding_dim=256,
    hidden_dim=256,
    num_layers=2,
    dropout=0.2,
)

transformer_model = DecoderOnlyLM(
    vocab_size=10000,
    d_model=256,
    nhead=4,
    num_layers=4,
    dropout=0.2
)

In [4]:
models = [rnn_model, lstm_model, transformer_model]

for model in models:
    model.load_state_dict(torch.load(f"best_models/pth/best_model_{model._get_name()}.pth", map_location='cpu'))
    print(f"Loaded {model._get_name()}")
    model.to(device)
    model.eval()



Loaded RNNModel
Loaded LSTMModel
Loaded DecoderOnlyLM


In [5]:
prompt = "Which do you prefer? Dogs or cats?"
# Tokenize the prompt
import sentencepiece as spm
tokenizer = spm.SentencePieceProcessor(model_file=TOKENIZER_PATH)

In [6]:
for model in models:
    print(f"Generating text with {model._get_name()}")
    response = model.generate(tokenizer, prompt, device='mps')
    print(f"Response: {response}")
    print("--------------------------------")

Generating text with RNNModel
Response: Which do you prefer? Dogs or cats? said the young man, pointing to hisgeneral. The even catarnt in the hall of our old horses were asleep and ran from her sighing with a curious moriting deep, and when we ⁇  were going to be rater and sang
--------------------------------
Generating text with LSTMModel
Response: Yes: In the: Do said the sinking? We had knocking through a leg and beating yesterday.  ⁇ uite the letter said I am that was not mistaken, but I have the continual thought of one criminal. However, I know that I
--------------------------------
Generating text with DecoderOnlyLM
Response: No. There is no shirt. You are right? ones are not killed, sir. And Mademoiselle Gillenormand raised his hand towards his old womans bed, and let them speak to his feet in a low tone which had uttered her; still,
--------------------------------


In [13]:
from dataset import TextDataset
from utils import collate_fn
from torch.utils.data import DataLoader
from tqdm import tqdm
MAX_SEQ_LEN = 128
BATCH_SIZE = 32

val_dataset = TextDataset(VAL_FILE, tokenizer, MAX_SEQ_LEN)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

In [15]:
def evaluate_ppl(model, data_loader, criterion, vocab_size, device):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for inputs, targets in tqdm(data_loader, desc="Evaluating Perplexity"):
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs, _ = model(inputs)
            outputs = outputs.view(-1, vocab_size)
            targets = targets.view(-1)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    avg_loss = total_loss / len(data_loader)
    ppl = torch.exp(torch.tensor(avg_loss))
    return ppl.item()

evaluate_ppl(models[1], val_loader, nn.CrossEntropyLoss(), 10000, device)

Evaluating Perplexity: 100%|██████████| 310/310 [00:05<00:00, 51.90it/s]


22989.0703125

In [18]:
import math
def evaluate_ppl(model, data_loader, criterion, vocab_size, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in tqdm(data_loader, desc="Evaluating Perplexity"):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs, _ = model(inputs)
            outputs = outputs.view(-1, vocab_size)
            targets = targets.view(-1)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    avg_loss = total_loss / len(data_loader)
    ppl = math.exp(avg_loss)
    return ppl
evaluate_ppl(models[1], val_loader, nn.CrossEntropyLoss(), 10000, device)

Evaluating Perplexity: 100%|██████████| 310/310 [00:05<00:00, 51.91it/s]


22989.07386572504

In [None]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def evaluate_bleu(model, dataset, sp, num_samples=50):
    model.eval()
    smooth = SmoothingFunction().method4
    scores = []

    for i in range(min(len(dataset), num_samples)):
        