In [None]:
!pip install transformer

In [2]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import numpy as np

# Load the pre-trained model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Enable hidden states output
model.config.output_hidden_states = True

# Prepare input sentences
def tokenize_sentences(sentences, tokenizer, max_length=128):
    return tokenizer(sentences, padding=True, truncation=True, max_length=max_length, return_tensors='pt')

sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "Hello world! This is a test sentence.",
    "Transformers are a type of neural network architecture.",
    "Natural Language Processing is a fascinating field.",
    "OpenAI develops artificial intelligence technologies."
]

inputs = tokenize_sentences(sentences, tokenizer)

# Forward pass
with torch.no_grad():
    outputs = model(**inputs)

# Extract hidden states
hidden_states = outputs.hidden_states
last_hidden_state = hidden_states[-1]

# Define mean pooling function
def mean_pooling(token_embeddings, attention_mask):
    attention_mask = attention_mask.unsqueeze(-1)
    summed = torch.sum(token_embeddings * attention_mask, 1)
    count = torch.sum(attention_mask, 1)
    return summed / count

# Compute sentence embeddings
sentence_embeddings = mean_pooling(last_hidden_state, inputs['attention_mask'])
sentence_embeddings_np = sentence_embeddings.detach().numpy()

# Print embeddings
def print_embeddings(sentences, embeddings):
    for sentence, embedding in zip(sentences, embeddings):
        print(f"Sentence: {sentence}")
        print(f"Embedding length: {len(embedding)}\n")

print_embeddings(sentences, sentence_embeddings_np)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Sentence: The quick brown fox jumps over the lazy dog.
Embedding length: 768

Sentence: Hello world! This is a test sentence.
Embedding length: 768

Sentence: Transformers are a type of neural network architecture.
Embedding length: 768

Sentence: Natural Language Processing is a fascinating field.
Embedding length: 768

Sentence: OpenAI develops artificial intelligence technologies.
Embedding length: 768

