In [1]:
import torch
from transformers import BertForMaskedLM, BertTokenizer

# Load pre-trained BERT model for masked language modeling
model_name = 'bert-large-uncased'
model = BertForMaskedLM.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)

# Input text
text = "The cat was [MASK] in colour"

# Tokenize the input text
inputs = tokenizer.encode(text, return_tensors="pt")

# Predict the masked token
with torch.no_grad():
    outputs = model(inputs)
    predictions = outputs.logits

# Get the predicted token IDs for the masked positions
masked_positions = [i for i, token_id in enumerate(inputs[0]) if token_id == tokenizer.mask_token_id]
predicted_token_ids = torch.argmax(predictions[0, masked_positions], dim=-1)

# Convert predicted token IDs back to tokens
predicted_tokens = [tokenizer.convert_ids_to_tokens(token_id.item()) for token_id in predicted_token_ids]

# Print the predicted tokens
for position, token in zip(masked_positions, predicted_tokens):
    print(f"Predicted token at position {position}: {token}")


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Predicted token at position 4: painted
