In [1]:
import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import os
import random

In [2]:
os.environ['CUDA_VISIBLE_DEVICES']='0'

In [3]:
with open('KorQuAD_v1.0_dev.json', 'r', encoding='utf8') as fp:
    test_data = json.load(fp)

In [4]:
sbert_iq = SentenceTransformer('../model_results/sbert_iq_all')
ours_ad = SentenceTransformer('../model_results_ft/ours_ad')
ours_ad_st = SentenceTransformer('../model_results_ft/ours_ad_st')
ours_curr = SentenceTransformer('../model_results_ft/ours_curr')

sbert_iq.to('cuda')
ours_ad.to('cuda')
ours_ad_st.to('cuda')
ours_curr.to('cuda')
print('')

In [5]:
pairs =[]
for cnt, i in enumerate(test_data['data']):
    for p in i['paragraphs']:
        for qs in p['qas']:
            rn1 = random.randint(0,len(test_data))
            while rn1 == cnt:
                rn1 = random.randint(0,len(test_data)-1)
            rn2 = random.randint(0, len(test_data['data'][rn1]['paragraphs'])-1)
            pairs.append({
                'sent': qs['question'],
                'doc': p['context'],
                'neg_doc':test_data['data'][rn1]['paragraphs'][rn2]['context']
            })

In [6]:
psg = [i['doc'] for i in pairs]
sents = [i['sent'] for i in pairs]
psg_st = ['[PSG] ' +i['doc'] for i in pairs]
sents_st = ['[SENT] '+i['sent'] for i in pairs]
neg_psg = [i['neg_doc'] for i in pairs]
neg_psg_st = ['[PSG] '+i['neg_doc'] for i in pairs]

In [7]:
def psg_sent_vec(psg, sents, neg_psg, model):
    vec1 = model.encode(psg)
    vec2 = model.encode(sents)
    vec3 = model.encode(neg_psg)
    return vec1, vec2, vec3
def knn(index_vec, test_vec):
    knn_model = NearestNeighbors(n_neighbors=3,
                     metric='cosine',
                     algorithm='brute',
                     n_jobs=-1)
    knn_model.fit(index_vec)
    res = knn_model.kneighbors(test_vec, 3, return_distance=True)
    
    return res

def cos_sim(psg, sent, neg_psg, model):
    pvec,svec, npvec = psg_sent_vec(psg, sent,neg_psg, model)
    cos_sim1 = cosine_similarity([pvec, svec])[0][1]
    cos_sim2 = cosine_similarity([npvec, svec])[0][1]
    return cos_sim1, cos_sim2
    #print(m_name,': ' ,cos_sim,)
    
def print_sim(text,pos, neg, model, model_name):
    cn1,cn2 = 0,0
    for t,p,n in zip(text,pos,neg):
        c1, c2 = cos_sim(p,t,n, model)
        cn1 += c1
        cn2 += c2
    print(model_name+'_pos: ', cn1/ len(text))
    print(model_name+'_neg: ', cn2/ len(text))
        