In [None]:
# First, you need to install the libraries:
# pip install transformers torch

from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch

# 1. Load a pre-trained ENCODER-ONLY model (BERT) and its tokenizer
# "bert-base-uncased" is the standard BERT model.
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)

# 2. Define our input text with a "mask"
# The [MASK] token is what BERT was trained to predict.
text = "The capital of France is [MASK]."

# 3. Tokenize the input
# This converts our text into numbers (tokens)
inputs = tokenizer(text, return_tensors="pt")

# 4. Get the model's predictions
# We pass the tokens through the BERT model
with torch.no_grad():
    outputs = model(**inputs)
    predictions = outputs.logits

# 5. Find the token ID of the masked word
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]

# 6. Get the top 5 predicted token IDs for that position
predicted_token_ids = torch.topk(predictions[0, mask_token_index[0]], 5).indices

# 7. Decode the token IDs back into readable words
predicted_tokens = tokenizer.convert_ids_to_tokens(predicted_token_ids)

# 8. Print the results
print(f"Original Text: {text}")
print(f"Top 5 Predictions for [MASK]: {predicted_tokens}")

# --- Example Output ---
# Original Text: The capital of France is [MASK].
# Top 5 Predictions for [MASK]: ['paris', 'berlin', 'london', 'brussels', 'madrid']

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]