In [10]:
from transformers import BertTokenizer, BertForMaskedLM
import torch

# 토크나이저 및 모델 로드
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForMaskedLM.from_pretrained(model_name)

# 입력 문장 (단어 하나 마스킹)
sentence = "MLM and NSP is the [MASK] task of BERT"
inputs = tokenizer(sentence, return_tensors="pt")

# 예측 실행
with torch.no_grad():
    outputs = model(**inputs)
    predictions = outputs.logits

# 마스킹된 토큰 예측
masked_index = inputs.input_ids[0].tolist().index(tokenizer.mask_token_id)
predicted_token_id = predictions[0, masked_index].argmax().item()
predicted_word = tokenizer.decode([predicted_token_id])

print("Predicted word:", predicted_word)  # "Predicted word: tool" (예시)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Predicted word: main


In [11]:
from transformers import pipeline

unmasker = pipeline('fill-mask', model='bert-base-uncased')
unmasker("MLM and NSP is the [MASK] task of BERT.")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


[{'score': 0.2572787404060364,
  'token': 2364,
  'token_str': 'main',
  'sequence': 'mlm and nsp is the main task of bert.'},
 {'score': 0.20740661025047302,
  'token': 3078,
  'token_str': 'primary',
  'sequence': 'mlm and nsp is the primary task of bert.'},
 {'score': 0.06773309409618378,
  'token': 2034,
  'token_str': 'first',
  'sequence': 'mlm and nsp is the first task of bert.'},
 {'score': 0.06548521667718887,
  'token': 2430,
  'token_str': 'central',
  'sequence': 'mlm and nsp is the central task of bert.'},
 {'score': 0.06167421117424965,
  'token': 3937,
  'token_str': 'basic',
  'sequence': 'mlm and nsp is the basic task of bert.'}]

In [None]:
from transformers import pipeline

unmasker = pipeline('fill-mask', model='distilbert-base-uncased')
unmasker("MLM and NSP is the [MASK] task of BERT.")

Device set to use cuda:0


[{'score': 0.25902441143989563,
  'token': 3078,
  'token_str': 'primary',
  'sequence': 'mlm and nsp is the primary task of bert.'},
 {'score': 0.16309845447540283,
  'token': 2364,
  'token_str': 'main',
  'sequence': 'mlm and nsp is the main task of bert.'},
 {'score': 0.08182769268751144,
  'token': 4563,
  'token_str': 'core',
  'sequence': 'mlm and nsp is the core task of bert.'},
 {'score': 0.04023785889148712,
  'token': 7037,
  'token_str': 'dual',
  'sequence': 'mlm and nsp is the dual task of bert.'},
 {'score': 0.024844925850629807,
  'token': 4054,
  'token_str': 'principal',
  'sequence': 'mlm and nsp is the principal task of bert.'}]

In [None]:
from transformers import pipeline

unmasker = pipeline('fill-mask', model='albert-base-v2')
unmasker("MLM and NSP is the [MASK] task of BERT.")

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForMaskedLM: ['albert.pooler.bias', 'albert.pooler.weight']
- This IS expected if you are initializing AlbertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


[{'score': 0.04760139808058739,
  'token': 6612,
  'token_str': 'ultimate',
  'sequence': 'mlm and nsp is the ultimate task of bert.'},
 {'score': 0.024472367018461227,
  'token': 20766,
  'token_str': 'hardest',
  'sequence': 'mlm and nsp is the hardest task of bert.'},
 {'score': 0.023495331406593323,
  'token': 1256,
  'token_str': 'primary',
  'sequence': 'mlm and nsp is the primary task of bert.'},
 {'score': 0.021575214341282845,
  'token': 407,
  'token_str': 'main',
  'sequence': 'mlm and nsp is the main task of bert.'},
 {'score': 0.018088089302182198,
  'token': 18369,
  'token_str': 'foremost',
  'sequence': 'mlm and nsp is the foremost task of bert.'}]