# Results of the Experiments

This notebook contains the results of the models, followed by a section to test the models yourself.

## SNLI Results

This section contains the accuracy scores of the pre-trained models on the SNLI task.

In [1]:
# import required packages
import torch
import pytorch_lightning as pl
import spacy
tokenizer = spacy.load("en")

# import required code files
from dataset.LoadData import *
from main import *

In [2]:
# load the test dataset
vocab, label_vocab, _, _, test_iter = load_snli(device=None, batch_size=64, return_label_vocab=True)

### AWE Model 

This subsection contains the results of the AWE on the SNLI task.

In [6]:
# create a PyTorch Lightning trainer
trainer = pl.Trainer(logger=False,
                     checkpoint_callback=False,
                     gpus=1 if torch.cuda.is_available() else 0,
                     progress_bar_refresh_rate=1)

# load the AWE model from the given checkpoint
model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/awe/checkpoints/epoch=10.ckpt')

# test the model
model.freeze()
test_result = trainer.test(model, test_dataloaders=test_iter, verbose=True)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(HTML(value='Testing'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': tensor(0.6793, device='cuda:0'),
 'test_loss': tensor(0.7365, device='cuda:0')}
--------------------------------------------------------------------------------



### UniLSTM Model 

This subsection contains the results of the unidirectional LSTM on the SNLI task.

In [7]:
# create a PyTorch Lightning trainer
trainer = pl.Trainer(logger=False,
                     checkpoint_callback=False,
                     gpus=1 if torch.cuda.is_available() else 0,
                     progress_bar_refresh_rate=1)

# load the UniLSTM model from the given checkpoint
model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/unilstm/checkpoints/epoch=11.ckpt')

# test the model
model.freeze()
test_result = trainer.test(model, test_dataloaders=test_iter, verbose=True)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(HTML(value='Testing'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': tensor(0.7971, device='cuda:0'),
 'test_loss': tensor(0.5171, device='cuda:0')}
--------------------------------------------------------------------------------



### BiLSTM Model 

This subsection contains the results of the bidirectional LSTM on the SNLI task.

In [None]:
# create a PyTorch Lightning trainer
trainer = pl.Trainer(logger=False,
                     checkpoint_callback=False,
                     gpus=1 if torch.cuda.is_available() else 0,
                     progress_bar_refresh_rate=1)

# load the BiLSTM model from the given checkpoint
model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/bilstm/checkpoints/epoch=?.ckpt')

# test the model
model.freeze()
test_result = trainer.test(model, test_dataloaders=test_iter, verbose=True)

### BiLSTMMax Model 

This subsection contains the results of the bidirectional LSTM with max pooling on the SNLI task.

In [None]:
# create a PyTorch Lightning trainer
trainer = pl.Trainer(logger=False,
                     checkpoint_callback=False,
                     gpus=1 if torch.cuda.is_available() else 0,
                     progress_bar_refresh_rate=1)

# load the BiLSTMMax model from the given checkpoint
model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/bilstmmax/checkpoints/epoch=?.ckpt')

# test the model
model.freeze()
test_result = trainer.test(model, test_dataloaders=test_iter, verbose=True)

## SentEval Results

This section contains the scores of the pre-trained models on the SentEval task.

## Test Yourself

This section allows you to input your own premise and hypothesis to one of the models and get the prediction.

In [3]:
# function to tokenize the input string
def tokenize(vocab, sentence):
    sentence = tokenizer(sentence)
    token_list = []
    for token in sentence:
        token = token.text.lower()
        token_list.append(vocab[token])
    return torch.tensor(token_list)

In [10]:
# ask the user for the model
print('Model to use (AWE, UniLSTM, BiLSTM, BiLSTMMax):')
model = input()
if (model == 'AWE'):
    model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/awe/checkpoints/epoch=10.ckpt')
elif (model == 'UniLSTM'):
    model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/unilstm/checkpoints/epoch=11.ckpt')
elif (model == 'BiLSTM'):
    model = model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/bilstm/checkpoints/epoch=?.ckpt')
elif (model == 'BiLSTMMax'):
    model = FullModel.load_from_checkpoint('pl_logs/lightning_logs/bilstmmax/checkpoints/epoch=?.ckpt')
else:
    raise Exception('Incorrect model name')

# ask user for premise and hypothesis
print('Input premise:')
premise = input()
print('Input hypothesis:')
hypothesis = input()

# tokenize the premise and hypothesis
premise = tokenize(vocab.stoi, premise).unsqueeze(dim=0)
hypothesis = tokenize(vocab.stoi, hypothesis).unsqueeze(dim=0)

# get the lengths
premise_length = torch.tensor([premise.shape[1]])
hypothesis_length = torch.tensor([hypothesis.shape[1]])

# forward through the embedding
premise = model.glove_embeddings(premise)
hypothesis = model.glove_embeddings(hypothesis)

# forward the premises and hypothesis through the Encoder
sentence_representations = model.encoder(premise, premise_length, hypothesis, hypothesis_length)

# pass through the classifier
predictions = model.classifier(sentence_representations)

# get the predicted label
predicted_label = torch.argmax(predictions, dim=1)
predicted_label = label_vocab.itos[predicted_label]

# print the predicted label
print('The model predicts the following relation:')
print(predicted_label)

Model to use (AWE, UniLSTM, BiLSTM, BiLSTMMax):
UniLSTM
Input premise:
Two women are embracing while holding to go packages.
Input hypothesis:
Two woman are holding packages.
The model predicts the following relation:
entailment
