In [1]:
import torch
from torch.utils.data import DataLoader
import pandas as pd

from src.utils import (
    read_glove_embeddings,
    build_tokenizer,
    compute_metrics,
    evaluate_model,
    load_checkpoint_weights,
)
from src.data import get_dataset, CustomCollator
from src.models import LSTMEmbedder, BiLSTMEmbedder, BiLSTMPooledEmbedder, SentenceClassificationModel

  from .autonotebook import tqdm as notebook_tqdm
[2024-04-19 17:08:17,952] [INFO] [datasets:config.py:58] PyTorch version 2.2.2 available.


### Evaluating the models on NLI

In [2]:
device = torch.device('mps')
batch_size = 64
mlp_hidden_dim = 512

EMBEDDERS = {
    "lstm": LSTMEmbedder,
    "bi-lstm": BiLSTMEmbedder,
    "bi-lstm-pool": BiLSTMPooledEmbedder
    }
CHECKPOINTS = {
    "lstm": "lstm_2024_04_17_13_47.pt",
    "bi-lstm": "bi-lstm_2024_04_17_13_53.pt",
    "bi-lstm-pool": "bi-lstm-pool_2024_04_17_14_52.pt"
}

dataset = get_dataset()
words, vectors = read_glove_embeddings()

tokenizer = build_tokenizer(words)

val_dataloader = DataLoader(
    dataset['validation'],
    collate_fn=CustomCollator(tokenizer, device),
    batch_size=batch_size,
    shuffle=True,
)
test_dataloader = DataLoader(
    dataset['test'],
    collate_fn=CustomCollator(tokenizer, device),
    batch_size=batch_size,
    shuffle=True,
)

[2024-04-19 17:08:26,995] [INFO] [torchtext.vocab.vectors:vectors.py:172] Loading vectors from .vector_cache/glove.840B.300d.txt.pt


The OrderedVocab you are attempting to save contains holes for indices [138702], your vocabulary could be corrupted !


In [6]:
all_results = []
for embedder_name, embedder_class in EMBEDDERS.items():

    # Initialize the models
    embedder = embedder_class(vectors)
    model = SentenceClassificationModel(embedder, mlp_hidden_dim, 3).to(device)

    # Load weights
    load_checkpoint_weights(model, f"models/{CHECKPOINTS[embedder_name]}", device, skip_glove=True)

    # Evaluate the model
    val_metrics = evaluate_model(model, val_dataloader)
    test_metrics = evaluate_model(model, test_dataloader)

    # Collect the metrics
    results = {
        "model": embedder_name,
        "val_accuracy": val_metrics['accuracy'].item(),
        "test_accuracy": test_metrics['accuracy'].item()
    }
    all_results.append(results)

results_df = pd.DataFrame(all_results)
results_df

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Unnamed: 0,model,val_accuracy,test_accuracy
0,lstm,0.810678,0.805296
1,bi-lstm,0.805459,0.803166
2,bi-lstm-pool,0.848669,0.844663


### Testing models with a custom hypothesis and premisis

In [36]:
def predict_label(model, premise, hypothesis):
    tokenized_premise = tokenizer(premise, return_tensors="pt").to(device)
    tokenized_premise['length'] = tokenized_premise['attention_mask'].sum(dim=1).cpu()

    tokenized_hypothesis = tokenizer(hypothesis, return_tensors="pt").to(device)
    tokenized_hypothesis['length'] = tokenized_hypothesis['attention_mask'].sum(dim=1).cpu()

    model_output = model(tokenized_premise, tokenized_hypothesis)
    prediction = model_output.argmax(dim=1)
    return prediction.cpu()

In [37]:
premise = "Two men sitting in the sun"
hypothesis = "Nobody is sitting in the shade"

predict_label(model, premise, hypothesis)

tensor([2])

In [38]:
premise = "A man is walking a dog"
hypothesis = "No cat is outside"

predict_label(model, premise, hypothesis)

tensor([2])

In [None]:
checkpoint_path = "./models/lstm_2024_04_17_13_47.pt"

state_dict = torch.load(checkpoint_path, map_location=torch.device('cpu'))


model.embedder.lstm.state_dict()

embedding_model_file = "models/lstm_embedder.pt"

torch.save(model.embedder.lstm.state_dict(), embedding_model_file)

In [28]:
# embedder.lstm.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('mps')))