In [1]:
import torch

from snli_lightning import SNLIModule
from models import ENCODER_TYPES
from encoder import CLASSIFIER_TYPES
from data import SNLIdataset, LABEL_VALUE
from vocab import Vocab


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# set params for the sentence encoder
opt = {
    "data_dir": "data/",
    "dataset_dir": "snli_1_0/",
    "model_dir": "models/",
    "vocab_file": "snli_vocab.json",
    "embeddings_file": "glove.840B.300d.txt",
    "snli_embeddings": "glove.snli.300d.txt",
    "embedding_size": 300,
    "hidden_size": 2048,
    "aggregate_method": "max",
    "classifier": "mlp"
}

LABELS = [k for k, _ in sorted(LABEL_VALUE.items(), key=lambda x: x[1])]

In [3]:
def predict_one(p, h):

    p_ids = torch.tensor([vocab.encode(vocab.tokenize(p))])
    h_ids = torch.tensor([vocab.encode(vocab.tokenize(h))])

    p = snli_model.enc.embedding(p_ids).float()
    h = snli_model.enc.embedding(h_ids).float()
    u = snli_model.enc.sentence_encoder(p, [len(p_ids)])
    v = snli_model.enc.sentence_encoder(h, [len(h_ids)])
    
    combined = torch.concat([u, v, abs(u - v), u * v], dim=1).float()
    out = snli_model.enc.classifier(combined)
    pred = out.argmax(dim=1)[0].item()

    return LABELS[pred]

In [4]:
# get encoder type from user
enc = input("Encoder type [mean, lstm, bilstm, poolbilstm]: ")
assert enc in ENCODER_TYPES, "Unknown encoder type {}".format(enc)
opt["encoder_type"] = enc

print('Parameters')
print('\n'.join(["{:20}\t{}".format(k,v) for k,v in opt.items()]))

dataset_dir = opt["data_dir"] + opt["dataset_dir"]

# initialize vocab with tokenizer and encoder
vocab = Vocab()

# get vocabulary from vocabfile
vocab_path = dataset_dir + (opt["vocab_file"] if opt["vocab_file"] != None else "snli_vocab.json")
assert vocab.load(vocab_path), print("Cannot load preprocessed vocab")

# read matched embeddings from preprocessed file
embed_path = dataset_dir + (opt["snli_embeddings"] if opt["snli_embeddings"] != None else "glove.snli.300d.txt")
embedding = vocab.match_with_embeddings(path=embed_path, embedding_size=opt["embedding_size"])

# load the model
print("Load model")
snli_model = SNLIModule(embedding=embedding, opt=opt)
snli_model.enc.sentence_encoder.load_state_dict(torch.load(opt["model_dir"] + "encoder_" + opt["encoder_type"]))
snli_model.enc.classifier.load_state_dict(torch.load(opt["model_dir"] + "classifier_" + opt["encoder_type"]))


Encoder type [mean, lstm, bilstm, poolbilstm]: poolbilstm
Parameters
data_dir            	data/
dataset_dir         	snli_1_0/
model_dir           	models/
vocab_file          	snli_vocab.json
embeddings_file     	glove.840B.300d.txt
snli_embeddings     	glove.snli.300d.txt
embedding_size      	300
hidden_size         	2048
aggregate_method    	max
classifier          	mlp
encoder_type        	poolbilstm
Loaded vocabulary with 33635 tokens
Matching vocab with embeddings from data/snli_1_0/glove.snli.300d.txt


30348it [00:10, 2877.17it/s]


Loaded 30348 embeddings from data/snli_1_0/glove.snli.300d.txt
Vocab coverage:  total 33635, common 30348 oov 3287 (9.77%)
Corpus coverage: total 12324969, common 12316551 oov 8418 (0.07%)
Most frequent out-of-vocabulary tokens:
                     	2187
..                   	78
rollerskaters        	62
surfboarder          	50
for$                 	42
graffited            	40
parasailer           	35
men-                 	33
sidewalk-            	30
boogieboard          	28
Load model


<All keys matched successfully>

In [5]:
p = "i am walking in the rain"
h = "i need an umbrella"
l = predict_one(p, h)
print("Model predicts: ", l)

Model predicts:  entailment


In [6]:
p = "the weather is really great"
h = "many people go to the beach"
l = predict_one(p, h)
print("Model predicts: ", l)

Model predicts:  neutral


In [7]:
p = "i am walking in the sun"
h = "it must be winter"
l = predict_one(p, h)
print("Model predicts: ", l)

Model predicts:  entailment


In [None]:
# try your own sentences
p = input("Premise: ")
h = input("Hypothesis: ")
l = predict_one(p, h)
print("Model predicts: ", l)