In [1]:
from transformers import BertTokenizer, BertModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased').to(device)
print(device)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


cuda


In [11]:
house_descriptions = [
    "This is a spacious house with a beautiful garden.",
    "Cozy apartment in the heart of downtown.",
    "Large family home with modern amenities.",
    "Charming cottage with a scenic view.",
    "I am a barbie girl in a barbie world."
]

# Tokenize and encode descriptions
encoded_descriptions = [tokenizer.encode(description, add_special_tokens=True, max_length=512, truncation=True) for description in house_descriptions]

In [12]:
max_len = max(len(desc) for desc in encoded_descriptions)
padded_descriptions = [desc + [0] * (max_len - len(desc)) for desc in encoded_descriptions]

# Convert to PyTorch tensors and move to GPU
input_ids = torch.tensor(padded_descriptions).to(device)


In [13]:
with torch.no_grad():
    outputs = model(input_ids)
    embeddings = outputs.last_hidden_state[:, 0, :]  # Use the [CLS] token embeddin

similarity_matrix = cosine_similarity(embeddings.cpu().numpy())

In [14]:
print("Similarity Matrix:")
print(similarity_matrix)

Similarity Matrix:
[[1.         0.79469895 0.830658   0.8217503  0.7700808 ]
 [0.79469895 1.0000001  0.88345337 0.92763567 0.86704135]
 [0.830658   0.88345337 1.         0.923169   0.8204694 ]
 [0.8217503  0.92763567 0.923169   1.         0.831722  ]
 [0.7700808  0.86704135 0.8204694  0.831722   1.        ]]


In [15]:
average_similarity = np.mean(similarity_matrix)
print("Average Similarity Score:", average_similarity)

Average Similarity Score: 0.87765425


In [1]:
import torch
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained SentenceTransformer model onto GPU
model = SentenceTransformer("all-MiniLM-L6-v2").to(device)

# Single list of sentences
sentences = [
    "This is a spacious house with a beautiful garden.",
    "Cozy apartment in the heart of downtown.",
    "Large family home with modern amenities.",
    "Charming cottage with a scenic view.",
    "I am a barbie girl in a barbie world.",
]

# Compute embeddings
embeddings = model.encode(sentences, convert_to_tensor=True).to(device)

In [9]:
cosine_scores = util.cos_sim(embeddings, embeddings)

# Find the pairs with the highest cosine similarity scores
pairs = []
for i in range(cosine_scores.shape[0]):
    for j in range(cosine_scores.shape[1]):
        if cosine_scores[i][j] != 1.000:
            pairs.append({"index": [i, j], "score": cosine_scores[i][j]})


In [10]:
pairs = sorted(pairs, key=lambda x: x["score"], reverse=True)

for pair in pairs[0:10]:
    i, j = pair["index"]
    print("{} \t\t {} \t\t Score: {:.4f}".format(
        sentences[i], sentences[j], pair["score"]
    ))


I am a barbie girl in a barbie world. 		 I am a barbie girl in a barbie world. 		 Score: 1.0000
Charming cottage with a scenic view. 		 Charming cottage with a scenic view. 		 Score: 1.0000
This is a spacious house with a beautiful garden. 		 Large family home with modern amenities. 		 Score: 0.5722
Large family home with modern amenities. 		 This is a spacious house with a beautiful garden. 		 Score: 0.5722
This is a spacious house with a beautiful garden. 		 Charming cottage with a scenic view. 		 Score: 0.5342
Charming cottage with a scenic view. 		 This is a spacious house with a beautiful garden. 		 Score: 0.5342
Cozy apartment in the heart of downtown. 		 Large family home with modern amenities. 		 Score: 0.4416
Large family home with modern amenities. 		 Cozy apartment in the heart of downtown. 		 Score: 0.4416
Cozy apartment in the heart of downtown. 		 Charming cottage with a scenic view. 		 Score: 0.4315
Charming cottage with a scenic view. 		 Cozy apartment in the heart of d