# **Mask Filling**

In [None]:
from transformers import AutoTokenizer, BartForConditionalGeneration
from huggingface_hub import login

# login("huggingface API key here")

# Tải bộ tokenizer cho mô hình BART (cơ bản) từ Hugging Face hub
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")
# Tải mô hình BART đã được pre-trained
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")

TXT = "When is you birthday, Trang? - It's on <mask> fifteenth of May"
input_ids = tokenizer(TXT, return_tensors="pt")["input_ids"]
logits = model(input_ids).logits

# Tìm từ match
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
# Mỗi từ match => có phần trăm (trọng số) đi kèm
probs = logits[0, masked_index].softmax(dim=0)
values, predictions = probs.topk(3)

# print(values, predictions)

tensor([0.9474, 0.0067, 0.0044], grad_fn=<TopkBackward0>) tensor([  5, 392, 273])


In [None]:
tokenizer.decode(predictions).split()

['the', 'May', 'Friday']

# **Text Summarization**

In [None]:
from transformers import AutoTokenizer, BartForConditionalGeneration

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")

ARTICLE_TO_SUMMARIZE = (
    "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
    "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
    "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
)

inputs = tokenizer(ARTICLE_TO_SUMMARIZE, max_length=1024, return_tensors="pt")

# Kết quả trả về là một đoạn tóm tắt của văn bản nhưng dưới dạng id
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=20)
tokenizer.batch_decode(summary_ids, skip_special_token=True, clean_up_tokenization_spaces=False)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


['</s><s>PG&E scheduled the blackouts in response to forecasts for high winds amid dry conditions</s>']

# **Text Classification**

In [None]:
import torch
from transformers import AutoTokenizer, BartForSequenceClassification

# valhalla/bart-large-sst2
model = BartForSequenceClassification.from_pretrained("valhalla/bart-large-sst2")
tokenizer = AutoTokenizer.from_pretrained("valhalla/bart-large-sst2")

inputs = tokenizer("I hate programming and doing AI projects", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]

You passed `num_labels=3` which is incompatible to the `id2label` map of length `2`.


'NEGATIVE'

# **Question Answering**

In [None]:
from transformers import AutoTokenizer, BartForQuestionAnswering

# valhalla/bart-large-finetuned-squadv1
model = BartForQuestionAnswering.from_pretrained("valhalla/bart-large-finetuned-squadv1")
tokenizer = AutoTokenizer.from_pretrained("valhalla/bart-large-finetuned-squadv1")

question = "Who is Elon Musk?"
text = "Elon Musk is a business magnate, industrial designer, and engineer. He is the founder, CEO, and lead designer of SpaceX, Tesla"

inputs = tokenizer(question, text, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

# Model phải đi tìm điểm bắt đầu để bắt đầu generate ra response
# Điểm kểt thúc để xác định khi nào response sẽ kết thúc
answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()

predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens, skip_specials_tokens=True)

You passed `num_labels=3` which is incompatible to the `id2label` map of length `2`.
You passed `num_labels=3` which is incompatible to the `id2label` map of length `2`.
You passed `num_labels=3` which is incompatible to the `id2label` map of length `2`.
You passed `num_labels=3` which is incompatible to the `id2label` map of length `2`.


' a business magnate, industrial designer, and engineer'

In [None]:
# Hallucination