In [3]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
pip install transformers



In [5]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
text = 'ChatGPT is a language model developed by OpenAI, based on the GPT (Generative Pre-trained Transformer) architecture. '

# Tokenize and encode the text
encoding = tokenizer.encode(text)
print("Token IDs:", encoding)
# masking the tokens
print(tokenizer.get_special_tokens_mask(encoding))

# Convert token IDs back to tokens
tokens = tokenizer.convert_ids_to_tokens(encoding)
print("Tokens:", tokens)

#

Token IDs: [101, 24705, 1204, 17095, 1942, 1110, 170, 1846, 2235, 1872, 1118, 3353, 1592, 2240, 117, 1359, 1113, 1103, 15175, 1942, 113, 9066, 15306, 11689, 118, 3972, 13809, 23763, 114, 4220, 119, 102]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
Tokens: ['[CLS]', 'Cha', '##t', '##GP', '##T', 'is', 'a', 'language', 'model', 'developed', 'by', 'Open', '##A', '##I', ',', 'based', 'on', 'the', 'GP', '##T', '(', 'Gene', '##rative', 'Pre', '-', 'trained', 'Trans', '##former', ')', 'architecture', '.', '[SEP]']


In [7]:
from transformers import BertForSequenceClassification
import torch # Import torch

# Load a pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained("bert-base-cased")

# Use the encoded text as input to the model
input_ids = encoding
input_ids = [input_ids] # Add batch dimension

# Get the model's output (logits)
outputs = model(input_ids=torch.tensor(input_ids))
print("Model output (logits):", outputs.logits)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model output (logits): tensor([[-0.0672, -0.2236]], grad_fn=<AddmmBackward0>)


In [10]:
from transformers import BertForQuestionAnswering
import torch

# !pip install accelerate -q
model_qa = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

question = "Who developed ChatGPT?"

encoding_qa = tokenizer(question, text, return_tensors='pt')

# Get the model's output (start and end logits for the answer span)
outputs_qa = model_qa(**encoding_qa)

# The output logits indicate the start and end positions of the answer in the input sequence
answer_start_scores = outputs_qa.start_logits
answer_end_scores = outputs_qa.end_logits

# Find the tokens with the highest start and end scores
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1 # Add 1 to include the end token

# Convert the input IDs back to tokens to extract the answer span
tokens_qa = tokenizer.convert_ids_to_tokens(encoding_qa['input_ids'][0])

# Extract the answer span
answer_tokens = tokens_qa[answer_start:answer_end]

# Join the tokens to get the answer string, handling potential word piece tokens (e.g., ##)
answer = tokenizer.convert_tokens_to_string(answer_tokens)

print("Question:", question)
print("Answer:", answer)

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Question: Who developed ChatGPT?
Answer: a
