# 01 Sentence Transformer Demo

In this notebook, we will:
1. Load a Sentence Transformer model.
2. Encode some sample sentences.
3. Inspect and print the resulting embeddings.


In [None]:
import torch
from transformers import AutoTokenizer
from src.model import SentenceTransformerModel

# Create model & tokenizer
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = SentenceTransformerModel(model_name=model_name, pooling='mean')
model.eval()

sentences = [
    "Hello, how are you?",
    "I love exploring deep learning techniques.",
    "Sentence transformers encode text into embeddings."
]

encoded = tokenizer(
    sentences,
    padding=True,
    truncation=True,
    max_length=32,
    return_tensors='pt'
)

with torch.no_grad():
    embeddings = model(encoded['input_ids'], encoded['attention_mask'])

print('Embeddings shape:', embeddings.shape)
for i, sentence in enumerate(sentences):
    print(f"\nSentence: {sentence}")
    print("Embedding (first 5 values):", embeddings[i][:5])
