In [1]:
import torch

from transformers import AutoTokenizer, AutoModel

import numpy as np
from scipy import spatial, stats

from models import SupervisedSimCSE, UnsupervisedSimCSE

In [2]:
# Device
device=torch.device("cuda:0")

# Load Pre-Trained Tokenizer, LM
tokenizer=AutoTokenizer.from_pretrained("roberta-base")
pretrained=AutoModel.from_pretrained("roberta-base").to(device)

# Load Trained Model: Supervised SimCSE
#model=SupervisedSimCSE(pretrained=pretrained)
# Load Trained Model: Unsupervised SimCSE
model=UnsupervisedSimCSE(pretrained=pretrained)
model.load_state_dict(torch.load("./model/SimCSE_Unsup_batch128_lr5e-05_step5500.pth"))
model=model.to(device)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
# STS Benchmark Dataset
# https://ixa2.si.ehu.eus/stswiki/index.php/STSbenchmark
with open("./dataset/stsbenchmark/sts-test.csv", "r") as f:
    stsb_test=f.read()
    f.close()

In [4]:
# Eval Mode
model.eval()

preds=[]
labels=[]
for data in stsb_test.split('\n')[:-1]:
    label, sent1, sent2=data.split('\t')[4:7]
    labels.append(float(label))
    
    repr_sent1=model.get_embedding(tokenizer.encode(sent1, return_tensors='pt').to(device))
    repr_sent2=model.get_embedding(tokenizer.encode(sent2, return_tensors='pt').to(device))
    
    pred=1-spatial.distance.cosine(np.array(repr_sent1.detach().cpu()), np.array(repr_sent2.detach().cpu()))
    preds.append(pred)

In [5]:
np.corrcoef(preds, labels)

array([[1.        , 0.80023531],
       [0.80023531, 1.        ]])

In [6]:
stats.spearmanr(preds, labels)

SpearmanrResult(correlation=0.7906730267882331, pvalue=1.056991534041716e-295)