In [2]:
import torch
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F

class SentenceTransformer(torch.nn.Module):
    def __init__(self, model_name='distilbert-base-uncased', pooling='mean'):
        super(SentenceTransformer, self).__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.encoder = AutoModel.from_pretrained(model_name)
        self.pooling = pooling

    def forward(self, sentences):
        inputs = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
        outputs = self.encoder(**inputs)
        token_embeddings = outputs.last_hidden_state  # (batch_size, seq_len, hidden_size)

        if self.pooling == 'mean':
            input_mask_expanded = inputs['attention_mask'].unsqueeze(-1).expand(token_embeddings.size())
            sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, dim=1)
            sum_mask = input_mask_expanded.sum(dim=1)
            sentence_embeddings = sum_embeddings / sum_mask
        elif self.pooling == 'cls':
            sentence_embeddings = token_embeddings[:, 0]
        else:
            raise ValueError("Unsupported pooling method")

        return F.normalize(sentence_embeddings, p=2, dim=1)

if __name__ == '__main__':
    model = SentenceTransformer()
    model.eval()

    sample_sentences = [
        "The quick brown fox jumps over the lazy dog.",
        "A fast, dark-colored fox leaps across a sleepy canine.",
        "Transformers are state-of-the-art for NLP tasks."
    ]

    with torch.no_grad():
        embeddings = model(sample_sentences)

    for i, emb in enumerate(embeddings):
        print(f"Sentence {i + 1}:")
        print(emb.numpy()[:5], '...')  # Show first 5 values for brevity


2025-04-22 21:27:53.624883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Sentence 1:
[-0.01186156 -0.00205345 -0.00344327  0.02032673  0.03441968] ...
Sentence 2:
[-0.00659874  0.00383072 -0.01010892  0.01201109  0.03641137] ...
Sentence 3:
[-0.02178474 -0.01185572  0.00432812  0.02263459  0.01844462] ...


In [3]:
F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0))

tensor([0.9033])

In [4]:
F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[2].unsqueeze(0))

tensor([0.5787])

In [5]:
F.cosine_similarity(embeddings[1].unsqueeze(0), embeddings[2].unsqueeze(0))

tensor([0.6053])