In [None]:
!pip --quiet install sentence_transformers

In [None]:
from sentence_transformers import SentenceTransformer
import torch
from torch.nn.functional import cosine_similarity
import numpy as np

model = SentenceTransformer('all-MiniLM-L12-v2')

In [None]:
example_sentence1 = "I enjoy reading books in my free time."  # 私は自由時間に本を読むのが好きです。
example_sentence2 = "In my spare time, I like to read books."  # 余暇には、本を読むのが好きです。
example_sentence3 = "The weather is nice, so let's go for a walk."  # 天気がいいので、散歩に行きましょう。
example_sentence4 = "The feline gracefully traversed the narrow alley, demonstrating remarkable agility."  # このネコ科動物は狭い路地を優雅に横切り、驚くべき敏捷性を示した。

In [None]:
sentences = [example_sentence1, example_sentence2, example_sentence3, example_sentence4]
# 例文の埋め込み表現を獲得
sentences_embeddings = model.encode(sentences)
print("np.shape(sentences_embeddings) : ", np.shape(sentences_embeddings)) # (4, 384) << (num_sentencce, dim)

np.shape(sentences_embeddings) :  (4, 384)


In [None]:
# 例文と比較したい文章
main_sentence = "The cat moved with impressive dexterity through the slim passageway."  # 猫は細い通路を見事な器用さで移動した。

# 例文と比較した文章の埋め込み表現を獲得
main_embeddings = model.encode(main_sentence)
print("np.shape(main_embeddings) : ", np.shape(main_embeddings)) # (384,) << (dim,)

np.shape(main_embeddings) :  (384,)


In [None]:
# 例文の埋め込み表現とのコサイン類似度を算出
similarity_scores = []
for index, embedding in enumerate(sentences_embeddings):
     similarity_score = cosine_similarity(torch.unsqueeze(torch.tensor(main_embeddings), dim=0), torch.unsqueeze(torch.tensor(embedding), dim=0))
     similarity_scores.append(similarity_score)
     print(f"main_sentence, example_sentence{index} : {similarity_score}")

main_sentence, example_sentence0 : tensor([0.0233])
main_sentence, example_sentence1 : tensor([0.0147])
main_sentence, example_sentence2 : tensor([0.1319])
main_sentence, example_sentence3 : tensor([0.6481])


In [None]:
# 一番コサイン類似度が高い文章を表示
print(f"main_sentence : {main_sentence}")
most_similarity_index = similarity_scores.index(max(similarity_scores))
print(f"main_sentence : {sentences[most_similarity_index]}")

main_sentence : The cat moved with impressive dexterity through the slim passageway.
main_sentence : The feline gracefully traversed the narrow alley, demonstrating remarkable agility.
