In [8]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

embedder = SentenceTransformer("jhgan/ko-sroberta-multitask")

# Corpus with example sentences
corpus = ['오늘도 나는 달리기를 한다',
        '한강을 걷는다',
        '조깅은 기분이 좋다',
        '마라톤을 할까',
        '철인 3종경기 할까']

corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
print(corpus_embeddings)

# Query sentences:
queries = ['오늘은 어디까지 뛸까']

# Find the closest 5 sentences of the corpus for each query sentence based on cosine similarity
top_k = 5
for query in queries:
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    cos_scores = util.pytorch_cos_sim(query_embedding, corpus_embeddings)[0]
    cos_scores = cos_scores.cpu()

#We use np.argpartition, to only partially sort the top_k results
top_results = np.argpartition(-cos_scores, range(top_k))[0:top_k]



print("\n\n======================\n\n")
print("Query:", query)
print("\nTop 5 most similar sentences in corpus:")

for idx in top_results[0:top_k]:
    print(corpus[idx].strip(), "(Score: %.4f)" % (cos_scores[idx]))



tensor([[-1.2425,  0.2502, -0.4412,  ..., -0.1632, -0.2297,  0.2218],
        [-1.4581, -0.1247, -0.3740,  ...,  0.0631,  0.3526,  0.9301],
        [-0.9100,  0.1751,  0.2568,  ...,  0.1309, -0.4577,  0.0573],
        [-0.1526,  0.1008,  0.0649,  ...,  0.0335, -0.1042,  0.3776],
        [-0.2257,  0.0762,  0.8426,  ...,  0.3515, -0.3963, -0.1976]])




Query: 오늘은 어디까지 뛸까

Top 5 most similar sentences in corpus:
오늘도 나는 달리기를 한다 (Score: 0.5788)
마라톤을 할까 (Score: 0.5386)
조깅은 기분이 좋다 (Score: 0.4332)
철인 3종경기 할까 (Score: 0.3722)
한강을 걷는다 (Score: 0.1379)
