In [1]:
import os
from torchtext import data
import torch
import torch.nn as nn
import torch.optim as optim
import senteval
import numpy as np
from torchtext.datasets import SNLI
from torchtext.vocab import GloVe
from model.NLINet import NLINet
import logging
logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG)

In [2]:
# build dataset and word embedding

glove = GloVe(name='840B', dim=300, cache="./dataset/.vector_cache")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#set up fields
text_field = data.Field(tokenize='spacy',tokenizer_language="en_core_web_sm",
                        lower=True,include_lengths=True,batch_first=True)
label_field = data.Field(sequential=False)

train, val, test = SNLI.splits(text_field, label_field, root="./dataset/.data")

# build vocab
text_field.build_vocab(train, vectors=glove)
label_field.build_vocab(train)
vocabulary = text_field.vocab

train_iters,val_iters,test_iters = data.BucketIterator.splits(
    (train, val, test), batch_size=128, device=device)

2021-04-22 21:08:13,494 : Loading vectors from ../my_job/dataset/.vector_cache/glove.840B.300d.txt.pt


In [3]:
def eval_SentEval(NLINet_model,vocabulary,text_field,device):
    logging.info("**************Start SentEval evaluation*******************")
    def batcher(params, batch):
        batch = [sent if sent != [] else ['.'] for sent in batch]
        torch.cuda.empty_cache()
        with torch.no_grad():
            batch_pad = params.text_field.process(batch,device=params.device)
            batch, batch_len = batch_pad
            embedding_glove = params.NLINet_model.embedding(batch)
            embeddings_sents = params.NLINet_model.encoder_model(embedding_glove,batch_len)
            embeddings_sents = embeddings_sents.cpu().numpy()
        return embeddings_sents

    # parameters
    params = {'task_path': "./SentEval/data", 'usepytorch': True, 'kfold': 10,'batch_size': 256}
    params['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 256,
                            'tenacity': 5, 'epoch_size': 4}

    params["word2id"] = vocabulary.stoi
    params["word_vec"] = vocabulary.vectors
    params["wvec_dim"] = 300
    params["NLINet_model"] = NLINet_model
    params["text_field"] = text_field
    params["device"] = device

    se = senteval.engine.SE(params, batcher)
    transfer_tasks = ['CR','MR', 'SUBJ', 'MPQA', 'SST2', 'TREC',
                      'SICKRelatedness', 'SICKEntailment', 'MRPC', 'STS14']

    results = se.eval(transfer_tasks)

    logging.info("SentEval evalution_results:", results)
    logging.info('********************SentEval evaluation completed*************************')
    
def eval_SNLI(NLINet_model,test_iters):
    logging.info("***********************Start SNLI evaluation*******************************")
    eval_acc = 0
    for j, batch in enumerate(test_iters):
        hypothesis = batch.hypothesis
        premise = batch.premise
        labels = batch.label-1

        # forward + backward + optimize
        preds = NLINet_model(hypothesis, premise)
        eval_acc += binary_acc(preds, labels).item()

    eval_acc = eval_acc/len(test_iters.dataset.examples)
    logging.info(f"SNLI evalution_accuracy: {eval_acc:.5f}")
    logging.info("***************SNLI evaluation completed********************")
    
def binary_acc(preds, y):
    """
    get accuracy
    """
    preds = torch.argmax(preds, dim=1)
    correct = torch.eq(preds, y).float()
    acc = correct.sum()
    return acc

In [4]:
# load Baseline model 
Baseline_model_path = os.path.join('./output/Baseline', "models", 'best_checkpoint.pkl')
NLINet_Baseline_model=torch.load(Baseline_model_path)
NLINet_Baseline_model.eval()

NLINet(
  (embedding): Embedding(33635, 300)
  (encoder_model): Baseline()
  (classifier): Sequential(
    (0): Linear(in_features=1200, out_features=512, bias=True)
    (1): Linear(in_features=512, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=3, bias=True)
  )
)

In [5]:
# evaluate Baseline model by SNLI
eval_SNLI(NLINet_Baseline_model,test_iters)

2021-04-22 21:09:11,384 : ***********************Start SNLI evaluation*******************************
2021-04-22 21:09:12,111 : SNLI evalution_accuracy: 0.65493
2021-04-22 21:09:12,112 : ***************SNLI evaluation completed********************


In [6]:
# evaluate Baseline model by SentEval
eval_SentEval(NLINet_Baseline_model,vocabulary,text_field,device)

2021-04-22 21:09:12,116 : **************Start SentEval evaluation*******************
2021-04-22 21:09:12,117 : ***** Transfer task : CR *****


2021-04-22 21:09:12,129 : Generating sentence embeddings
2021-04-22 21:09:12,165 : Generated sentence embeddings
2021-04-22 21:09:12,166 : Training pytorch-MLP-nhid0-adam-bs256 with (inner) 10-fold cross-validation
2021-04-22 21:10:04,817 : Best param found at split 1: l2reg = 1e-05                 with score 77.8
2021-04-22 21:10:47,885 : Best param found at split 2: l2reg = 1e-05                 with score 78.16
2021-04-22 21:11:32,130 : Best param found at split 3: l2reg = 0.0001                 with score 78.13
2021-04-22 21:12:15,561 : Best param found at split 4: l2reg = 0.0001                 with score 77.66
2021-04-22 21:12:59,606 : Best param found at split 5: l2reg = 0.001                 with score 77.89
2021-04-22 21:13:43,550 : Best param found at split 6: l2reg = 0.0001                 with score 77.69
2021-04-22 21:14:28,628 : B