In [1]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [2]:
model_id = 't5-base'

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
tokenizer = T5Tokenizer.from_pretrained(model_id)
model = T5ForConditionalGeneration.from_pretrained(model_id)
model.to(device)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [5]:
def summarize(text, max_length = 150, min_length = 80, num_beams = 6):
    inputs = tokenizer(
            "summarize this news article: " + text,
            return_tensors = "pt",
            max_length = 1024,
            truncation = True,
    ).to(device)
    
    summary_ids = model.generate(
        inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
        max_length = max_length,
        min_length = min_length,
        length_penalty = 2.0,
        num_beams = num_beams,
        early_stopping = True
    )
    summary = tokenizer.decode(summary_ids[0],skip_special_tokens = True)
    return summary

In [5]:
path = "news_article.txt"
with open(path, "r", encoding = "utf-8") as file:
    news_article = file.read()

In [7]:
summary = summarize(news_article)
print(summary)

only one of two security cameras was working near the site where the intruders broke in on the morning of Sunday 19 October . agents in the security control room did not have enough screens to follow the images in real-time . a lack of coordination meant police were initially sent to the wrong place once the alarm was raised . one of the most startling revelations was that the thieves had only 30 seconds to spare before police and private security guards arrived on the scene .


In [6]:
def answerQ(text, question, max_length = 150, num_beams = 6):
    inputs = tokenizer(
            "question: " + question + "context: " + text,
            return_tensors = "pt",
            max_length = 1024,
            truncation = True,
    ).to(device)
    
    answer_ids = model.generate(
        inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
        max_length = max_length,
        num_beams = num_beams,
        early_stopping = True
    )
    answer = tokenizer.decode(answer_ids[0],skip_special_tokens = True)
    return answer

In [9]:
Question = "Have the police arrested all the thieves"
answer = answerQ(news_article, Question)
print(Question + " ?", answer, sep = "\n")

Have the police arrested all the thieves ?
police believe they have arrested all four intruders
