In [9]:
from transformers import BertTokenizer, BertModel
import torch

In [10]:

# 1️⃣ Load pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)



In [11]:
# 4️⃣ Define sample texts
texts = [
    "A luminous red plastic bag sits alone on a dark beach.",
    "The ocean waves crash softly under the twilight sky.",
    "A glowing lantern floats over the calm river."
]

In [12]:

# 3️⃣ Tokenize and prepare input
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)



In [13]:
# 4️⃣ Generate embeddings
with torch.no_grad():
    outputs = model(**inputs)
    last_hidden_state = outputs.last_hidden_state      # (batch_size, seq_len, hidden_dim)
    pooled_output = outputs.pooler_output              # (batch_size, hidden_dim)


In [14]:

# 5️⃣ Print shapes
print("Last hidden state shape:", last_hidden_state.shape)
print("Pooled output shape:", pooled_output.shape)
print("Text embedding vector (first 5 values):", pooled_output)

Last hidden state shape: torch.Size([1, 14, 768])
Pooled output shape: torch.Size([1, 768])
Text embedding vector (first 5 values): tensor([[-9.4546e-01, -6.1769e-01, -9.6223e-01,  8.8632e-01,  8.7953e-01,
         -3.5460e-01,  8.7286e-01,  4.4277e-01, -9.2777e-01, -1.0000e+00,
         -7.1855e-01,  9.9277e-01,  9.9101e-01,  3.6126e-01,  9.5665e-01,
         -7.0923e-01, -5.2843e-01, -7.0288e-01,  4.2481e-01, -5.4067e-01,
          8.0766e-01,  1.0000e+00, -2.2575e-01,  3.8502e-01,  6.1145e-01,
          9.9947e-01, -8.1283e-01,  9.5013e-01,  9.6735e-01,  7.9687e-01,
         -6.6931e-01,  3.3326e-01, -9.9543e-01, -3.1332e-01, -9.5295e-01,
         -9.9663e-01,  6.6185e-01, -7.7373e-01, -5.1348e-02, -1.1557e-01,
         -9.2860e-01,  4.4690e-01,  1.0000e+00,  4.5150e-01,  6.5571e-01,
         -2.9764e-01, -1.0000e+00,  4.1575e-01, -9.1244e-01,  9.8659e-01,
          9.2289e-01,  9.7615e-01,  3.1163e-01,  6.4407e-01,  6.3993e-01,
         -5.0072e-01,  1.7543e-01,  3.2260e-01, -3.743