In [1]:
model_save_name = "InferSent-BiMaxPool_v3"
config = 'fast'
tasks = 'infersent'

In [2]:
from __future__ import absolute_import, division, unicode_literals

import os
import sys
import logging

from itertools import zip_longest
from collections import Counter

import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence
from torchtext.vocab import Vocab
import pytorch_lightning as pl
import pickle
import sklearn

from utils.text_processing import vocab_builder, text_preprocessor
from modules.embedding import Vocab_Embedding
from models.InferSent import InferSent
from utils.reproducibility import load_latest

PATH_TO_SENTEVAL = 'C:/Users/ivoon/SentEval'
PATH_TO_DATA = PATH_TO_SENTEVAL +  '/data'

sys.path.insert(0, PATH_TO_SENTEVAL)

import senteval

In [3]:
def prepare(params, samples):
    
    params.vocab = vocab_builder(samples)

    params.InferSent.embedding = Vocab_Embedding(params.vocab , None)

def batcher(params, batch):

    sentences = [text_preprocessor(s, params.vocab) for s in batch]
    
    sentences = list(zip_longest(*sentences, fillvalue=params.vocab["<PAD>"]))

    sentences = torch.LongTensor(sentences).to(params.InferSent.device)

    with torch.no_grad():
        embeddings = params.InferSent.encode(sentences)

    return embeddings.numpy()

In [4]:
if config=="fast":
    params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}
    params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
                                    'tenacity': 3, 'epoch_size': 2}
elif config=="default":
    params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}
    params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
                                    'tenacity': 3, 'epoch_size': 2}
else:
    raise ValueError(f"{config} is not a recognized SentEval profile. Please choose either 'fast' or 'default'.")

logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG)

model = load_latest(InferSent, save_name=model_save_name, inference=True, map_location="cuda")

params['InferSent'] = model

se = senteval.engine.SE(params, batcher, prepare)

if tasks == 'all':
    transfer_tasks =  ['CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC', 'SNLI',
                       'SICKEntailment', 'SICKRelatedness', 'STSBenchmark', 'ImageCaptionRetrieval',
                       'STS12', 'STS13', 'STS14', 'STS15', 'STS16',
                       'Length', 'WordContent', 'Depth', 'TopConstituents','BigramShift', 'Tense',
                       'SubjNumber', 'ObjNumber', 'OddManOut', 'CoordinationInversion'
                      ]
elif tasks == 'infersent':
    transfer_tasks =  ['MR', 'CR', 'SUBJ', 'MPQA', 'SST2', 'TREC', 'MRPC',
                       'SICKRelatedness', 'SICKEntailment', 'STS14'
                      ]

elif tasks == 'working':
    transfer_tasks =  ['SST2', 'SST5', 'TREC', 'MRPC',
                       'Length', 'WordContent', 'Depth', 'TopConstituents',
                       'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber',
                       'OddManOut', 'CoordinationInversion'
                      ]
elif tasks == 'transfer_all':
    transfer_tasks =  ['CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC', 'SNLI',
                       'SICKEntailment', 'SICKRelatedness', 'STSBenchmark', 'ImageCaptionRetrieval',
                       'STS12', 'STS13', 'STS14', 'STS15', 'STS16'
                       ]
elif tasks == 'probing_all':
    transfer_tasks =  ['Length', 'WordContent', 'Depth', 'TopConstituents','BigramShift', 'Tense',
                       'SubjNumber', 'ObjNumber', 'OddManOut', 'CoordinationInversion'
                      ]
elif tasks == 'test':
    transfer_tasks =  ['TREC']
else:
    raise ValueError(f"{tasks} is not a recognized SentEval tasks subset. Please choose either 'all', 'infersent', 'working', 'transfer_all' or 'probing_all'.")

print("Evaluating on:")
print(transfer_tasks)

results = se.eval(transfer_tasks)

print(results)

with open(os.path.join("./checkpoints", model_save_name, f"SentEval_{config}_{tasks}.pkl"), "wb+") as file:
    pickle.dump(results, file)


Found pretrained model at ./checkpoints\InferSent-BiMaxPool_v3\lightning_logs\version_7560215\checkpoints\epoch=8-step=77255.ckpt
2021-04-09 21:47:56,906 : ***** Transfer task : MR *****


Evaluating on:
['MR', 'CR', 'SUBJ', 'MPQA', 'SST2', 'TREC', 'MRPC', 'SICKRelatedness', 'SICKEntailment', 'STS14']
2021-04-09 21:48:02,685 : Loading vectors from ./data/glove\glove.840B.300d.txt.pt
2021-04-09 21:48:21,813 : Generating sentence embeddings
2021-04-09 22:41:02,503 : Generated sentence embeddings
2021-04-09 22:41:02,604 : Training pytorch-MLP-nhid0-rmsprop-bs128 with (inner) 5-fold cross-validation
2021-04-09 22:41:39,032 : Best param found at split 1: l2reg = 0.001                 with score 75.25
2021-04-09 22:42:16,750 : Best param found at split 2: l2reg = 0.0001                 with score 74.57
2021-04-09 22:42:53,434 : Best param found at split 3: l2reg = 1e-05                 with score 74.03
2021-04-09 22:43:29,920 : Best param found at split 4: l2reg = 1e-05                 with 

RuntimeError: CUDA out of memory. Tried to allocate 2.06 GiB (GPU 0; 2.00 GiB total capacity; 80.06 MiB already allocated; 1.12 GiB free; 82.00 MiB reserved in total by PyTorch)