In [2]:
from sentence_transformers import SentenceTransformer, util
import torch

embedder = SentenceTransformer('xlm-r-distilroberta-base-paraphrase-v1')


In [3]:
# Corpus with example sentences
corpus = ['A man is eating food.',
          'A man is eating pasta now.',
          'A man is eating a piece of bread.',
          'The girl is carrying a baby.',
          'A man is riding a horse.',
          'A woman is playing violin.',
          'Two men pushed carts through the woods.',
          'A man is riding a white horse on an enclosed ground.',
          'A monkey is playing drums.',
          'A cheetah is running behind its prey.'
          ]
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

# Query sentences:
queries = ['A man is eating pasta.', 'Someone in a gorilla costume is playing a set of drums.', 'A cheetah chases prey on across a field.']


# Find the closest 5 sentences of the corpus for each query sentence based on cosine similarity
top_k = 2
for query in queries:
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    cos_scores = util.pytorch_cos_sim(query_embedding, corpus_embeddings)[0]
    cos_scores = cos_scores.cpu()

    #We use torch.topk to find the highest 5 scores
    top_results = torch.topk(cos_scores, k=top_k)

    print("\n\n======================\n\n")
    print("Query:", query)
    print("\nTop 5 most similar sentences in corpus:")

    for score, idx in zip(top_results[0], top_results[1]):
        #print (idx)
        print(corpus[idx], "(Score: %.4f)" % (score))





Query: A man is eating pasta.

Top 5 most similar sentences in corpus:
A man is eating pasta now. (Score: 0.9490)
A man is eating food. (Score: 0.7416)




Query: Someone in a gorilla costume is playing a set of drums.

Top 5 most similar sentences in corpus:
A monkey is playing drums. (Score: 0.7041)
A cheetah is running behind its prey. (Score: 0.3311)




Query: A cheetah chases prey on across a field.

Top 5 most similar sentences in corpus:
A cheetah is running behind its prey. (Score: 0.8161)
A man is riding a white horse on an enclosed ground. (Score: 0.3325)
