<a href="https://colab.research.google.com/github/MinsooKwak/Study/blob/main/LLM/BERT/BERT_mask.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers



In [2]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

In [3]:
# Load modal and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# Text classification function
def classify_text(text):
  inputs = tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      padding='max_length',
      truncation=True,
      max_length=128,
      return_tensors='pt'
  )
  input_ids = inputs['input_ids'].to(device)
  attention_mask = inputs['attention_mask'].to(device)

  # Forward pass through model
  with torch.no_grad():
    outputs = model.to('cuda')(input_ids, attention_mask=attention_mask)

  logits= outputs.logits
  probabilities = torch.softmax(logits, dim=1).squeeze(dim=0)
  predicted_class = torch.argmax(probabilities).item()

  return predicted_class, probabilities

In [6]:
# Example
text_to_classify = "This is an example sentence"
predicted_class, probablities = classify_text(text_to_classify)

In [7]:
print(f'Predictec class : {predicted_class}')
print('Probabilities :')
for i, prob in enumerate(probablities):
  print(f'Class {i} : {prob.item()}')

Predictec class : 1
Probabilities :
Class 0 : 0.34375154972076416
Class 1 : 0.6562485098838806


In [9]:
import torch
from transformers import BertTokenizer, BertForMaskedLM

In [13]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForMaskedLM.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
def predict_next_word(text):
  # Tokenize input Text
  tokenized_text = tokenizer.tokenize(text)
  masked_index = tokenized_text.index('[MASK]')
  indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

  # Convert tokens to tensor
  tokens_tensor = torch.tensor([indexed_tokens]).to(device)

  # Forward pass
  with torch.no_grad():
    outputs = model.to('cuda')(tokens_tensor)

  predictions = outputs[0][0, masked_index].topk(k=5).indices.tolist()

  predicted_tokens=[]
  for token_index in predictions:
    predicted_token = tokenizer.convert_ids_to_tokens([token_index])[0]
    predicted_tokens.append(predicted_token)

  return predicted_tokens

In [17]:
#example
text_with_mask = 'I want to [MASK] a pizza for dinner'
predicted_tokens = predict_next_word(text_with_mask)

print(predicted_tokens)

['have', 'get', 'be', 'make', 'take']
