In [None]:
!pip install huggingface_hub
!pip install sentence-transformers
!pip install --upgrade transformers

In [3]:
# Standard Library Imports
import warnings

# PyTorch
import torch

## Hugging Face Transformers
from transformers import pipeline
from transformers.utils import logging
from sentence_transformers import util
from sentence_transformers import SentenceTransformer

# Hugging Face Hub
from huggingface_hub import login

## Google Colab
from google.colab import userdata

# Configuration
logging.set_verbosity_error()
warnings.filterwarnings('ignore')

In [None]:
# Login using the API key
login(userdata.get("HUGGINGFACEHUB_API"))

In [None]:
# the Embedding model (https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
model = SentenceTransformer("all-MiniLM-L6-v2")

### Calculate the cosine similarity between two sentences

In [12]:
# Embedding sentences
sentences1 = ['The cat sits outside',
              'A man is playing guitar',
              'The movies are awesome']

sentences2 = ['The dog plays in the garden',
              'A woman watches TV',
              'The new movie is so great']

embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)

print(embeddings1.shape, '|', embeddings2.shape)

torch.Size([3, 384]) | torch.Size([3, 384])


In [11]:
cosine_scores = util.cos_sim(embeddings1, embeddings2)
print(cosine_scores)

tensor([[ 0.2838,  0.1310, -0.0029],
        [ 0.2277, -0.0327, -0.0136],
        [-0.0124, -0.0465,  0.6571]])


In [15]:
# Average similarity score
average_similarity = torch.mean(cosine_scores)
print(f'Average Similarity Score: {average_similarity.item()}')

# Maximum similarity score
max_similarity = torch.max(cosine_scores)
print(f'\nMaximum Similarity Score: {max_similarity.item()}')

# Median similarity score
median_similarity = torch.median(cosine_scores)
print(f'\nMedian Similarity Score: {median_similarity.item()}')

Average Similarity Score: 0.13239024579524994

Maximum Similarity Score: 0.6571146845817566

Median Similarity Score: -0.002861538901925087
