In [99]:
! pip install --quiet transformers torchviz
! pip install --quiet accelerate sentencepiece datasets evaluate bitsandbytes tqdm
!pip install --quiet pytorch-lightning # pytorch wrapper
!pip install --quiet torchtext # text utilities

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone


In [106]:
import torch
from torchviz import make_dot
import matplotlib.pyplot as plt

In [111]:
# Import necessary libraries
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")

# Task 2: Summarization
summarization_input = "Summarize the following text: ServiceNow, Inc. provides enterprise information technology (IT) management software. The Company designs, develops, and markets a cloud computing platform to help companies manage digital workflows for enterprise operations."
input_ids_summarization = tokenizer("summarize: " + summarization_input, return_tensors="pt").input_ids
summary_ids = model.generate(input_ids_summarization)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Summarization:")
print(summary)

# Task 3: Question Answering
context = "ServiceNow, Inc. provides enterprise information technology (IT) management software. The Company designs, develops, and markets a cloud computing platform to help companies manage digital workflows for enterprise operations."
question = "What is ServiceNow known for?"
input_text_qa = f"question: {question} context: {context}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids
answer_ids = model.generate(input_ids_qa)
answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
print("\nQuestion Answering:")
print(answer)

# Task 4: English to French Translation
translation_input = "Translate the following English text to French: Hello, how are you?"
input_ids_translation = tokenizer("translate English to French: " + translation_input, return_tensors="pt").input_ids
translation_ids = model.generate(input_ids_translation)
translation = tokenizer.decode(translation_ids[0], skip_special_tokens=True)
print("\nTranslation:")
print(translation)





Summarization:
ServiceNow, Inc. is a company that provides IT management software for enterprise operations.

Question Answering:
IT management software

Translation:
Hello, c'est-ce-t-t-t-t-t-


In [112]:
print(model)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [113]:
print("\nModel Layers:")
for i, layer in enumerate(model.encoder.block):
    print(f"Block {i}: {layer}")


# Task 6: Print total number of parameters/weights in the model
total_parameters = sum(p.numel() for p in model.parameters())
print("\nTotal Parameters:", total_parameters)


Model Layers:
Block 0: T5Block(
  (layer): ModuleList(
    (0): T5LayerSelfAttention(
      (SelfAttention): T5Attention(
        (q): Linear(in_features=512, out_features=384, bias=False)
        (k): Linear(in_features=512, out_features=384, bias=False)
        (v): Linear(in_features=512, out_features=384, bias=False)
        (o): Linear(in_features=384, out_features=512, bias=False)
        (relative_attention_bias): Embedding(32, 6)
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (1): T5LayerFF(
      (DenseReluDense): T5DenseGatedActDense(
        (wi_0): Linear(in_features=512, out_features=1024, bias=False)
        (wi_1): Linear(in_features=512, out_features=1024, bias=False)
        (wo): Linear(in_features=1024, out_features=512, bias=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (act): NewGELUActivation()
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
)


## Set the tensor in final layer (decoder.final_layer_norm.weight) to all zeros.

In [119]:


# Set the weights of the final layer's normalization to zeros
model.decoder.final_layer_norm.weight.data.fill_(0.0)

# Verify if the Q&A task works after resetting the weights
question = "What is ServiceNow?"
input_text_qa = f"question: {question} context: {context}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids
answer_ids = model.generate(input_ids_qa)
print(input_text_qa)
print(input_ids_qa)
answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
print("\nQuestion Answering After Resetting Weights:")
print(answer)

## The question answering does not works after resetting the weights
## setting everything to zero is responsible for loss of expressiveness and the output of a linear layer with all zero weights becomes a constant value, often zero. This happens because the multiplication of all input features by zero results in an output of zero, regardless of the input values

question: What is ServiceNow? context: ServiceNow, Inc. provides enterprise information technology (IT) management software. The Company designs, develops, and markets a cloud computing platform to help companies manage digital workflows for enterprise operations.
tensor([[  822,    10,   363,    19,  1387, 17527,    58,  2625,    10,  1387,
         17527,     6,  1542,     5,   795,  5399,   251,   748,    41,  3177,
            61,   758,   889,     5,    37,  1958,  2888,     6,  1344,     7,
             6,    11,  3212,     3,     9,  3126, 10937,  1585,    12,   199,
           688,  1865,  1125, 16101,     7,    21,  5399,  2673,     5,     1]])

Question Answering After Resetting Weights:



In [120]:
import torch.nn as nn

# Define the new dimensions for the smaller layer
new_dim = 512  # Adjust this value as needed

# Replace the final layer normalization with a smaller layer
new_final_layer_norm = nn.LayerNorm(new_dim)
model.decoder.final_layer_norm = new_final_layer_norm

# Adjust other dependent layers to match the new dimension
model.decoder.block[0].linear1 = nn.Linear(new_dim, model.config.d_model)
model.decoder.block[0].linear2 = nn.Linear(model.config.d_model, new_dim)
model.decoder.block[1].linear1 = nn.Linear(new_dim, model.config.d_model)
model.decoder.block[1].linear2 = nn.Linear(model.config.d_model, new_dim)

# Verify if the Q&A task works after modifying the model
question = "What is Hugging Face known for?"
context = "Hugging Face is known for its contributions to NLP research and its transformer-based models."
input_text_qa = f"question: {question} context: {context}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids

print(input_ids_qa)
# Adjust input dimension to 256
input_ids_qa = input_ids_qa[:, :256]

answer_ids = model.generate(input_ids_qa)
answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
print("\nQuestion Answering After Modifying the Model:")
print(answer)


tensor([[  822,    10,   363,    19, 11560,  3896,  8881,   801,    21,    58,
          2625,    10, 11560,  3896,  8881,    19,   801,    21,   165,  7548,
            12,   445,  6892,   585,    11,   165, 19903,    18,   390,  2250,
             5,     1]])

Question Answering After Modifying the Model:
transformer-based models


## Finetuning on  SQuAD dataset

In [44]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import copy

In [18]:

train_dataset = load_dataset('squad', split='train') #87,599 samples
valid_dataset = load_dataset('squad', split='validation') # 10,570 samples

# Sample Data
sample_validation_dataset = next(iter(valid_dataset))
print (sample_validation_dataset)

Downloading builder script:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

{'id': '56be4db0acb8001400a502ec', 'title': 'Super_Bowl_50', 'context': 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi\'s Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.', 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'answers': {'text': ['Denver Broncos', 'Denver Broncos', 'Denver Broncos'], 'ans

In [127]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small", device_map="auto", torch_dtype=torch.float16)

In [128]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 76961152
all model parameters: 76961152
percentage of trainable model parameters: 100.00%


### Building the dataset for fine-tuning

InputFormat : context : {context} question : {question} <br />
OutputFormat : answer : {answer}

In [95]:
class QuestionGenerationDataset(Dataset):
    def __init__(self, tokenizer, filepath,
                 max_len_inp=512,max_len_out=96):
        self.path = filepath

        self.passage_column = "context"
        self.answer = "answer"
        self.question = "question"

        # self.data = pd.read_csv(self.path)
        self.data = pd.read_parquet(self.path).iloc[:5000,:] #demo purposes

        self.max_len_input = max_len_inp
        self.max_len_output = max_len_out
        self.tokenizer = tokenizer
        self.inputs = []
        self.targets = []
        self._build()

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        source_ids = self.inputs[index]["input_ids"].squeeze()
        target_ids = self.targets[index]["input_ids"].squeeze()
        #squeeze to get rid of the batch dimension
        src_mask = self.inputs[index]["attention_mask"].squeeze()
        target_mask = self.targets[index]["attention_mask"].squeeze()  # convert [batch,dim] to [dim]

        labels = copy.deepcopy(target_ids)
        labels [labels==0] = -100

        return {"source_ids": source_ids, "source_mask": src_mask,
                "target_ids": target_ids, "target_mask": target_mask,
                "labels":labels}

    def _build(self):
        for rownum,val in tqdm(self.data.iterrows()): # Iterating over the dataframe
            passage,answer,target = val[self.passage_column],val[self.answer],val[self.question]
            # T5 Input format for question answering tasks
            input_ = f"question: {target}  context: {passage}"
            target = f"answer: {str(answer)}" # Output format we require

            # tokenize inputs
            tokenized_inputs = self.tokenizer.batch_encode_plus(
                [input_], max_length=self.max_len_input,
                truncation = True,
                padding='max_length', return_tensors="pt"
            )
            # tokenize targets
            tokenized_targets = self.tokenizer.batch_encode_plus(
                [target], max_length=self.max_len_output,
                truncation = True,
                padding='max_length',return_tensors="pt"
            )

            self.inputs.append(tokenized_inputs)
            self.targets.append(tokenized_targets)

In [74]:
def create_pandas_dataset(data,
                          answer_threshold=10,
                          verbose = False):

  ''' Create a Pandas Dataframe from hugging face dataset.
  Params:
        answer_threshold: Only consider those Question Answer pairs where the Answer is short.
  '''
  count_long ,count_short = 0 , 0
  result_df  = pd.DataFrame(columns = ['context', 'answer','question'])
  for index,val in enumerate(tqdm(data)):
      passage = val['context']
      question = val['question']
      answer = val['answers']['text'][0]
      no_of_words = len(answer.split())
      if no_of_words >= answer_threshold:
          count_long = count_long + 1
          continue
      else:
          result_df.loc[count_short] = [passage] + [answer] + [question]
          count_short = count_short + 1
  if verbose:
    return (result_df,
            count_long,
            count_short)
  else:
    return result_df

In [75]:
sample_validation_dataset = next(iter(valid_dataset))
print (sample_validation_dataset)

context = sample_validation_dataset['context']
question = sample_validation_dataset['question']
answer = sample_validation_dataset['answers']['text'][0]
print('---------------'*9)
print('\nBreaking it Down\n')
print ("context:",context)
print ("question:",question)
print ("answer:",answer)

{'id': '56be4db0acb8001400a502ec', 'title': 'Super_Bowl_50', 'context': 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi\'s Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.', 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'answers': {'text': ['Denver Broncos', 'Denver Broncos', 'Denver Broncos'], 'ans

In [41]:
df_train , df_validation = create_pandas_dataset(train_dataset) , create_pandas_dataset(valid_dataset)
print(f"\n Total Train Samples:{df_train.shape} , Total Validation Samples:{df_validation.shape}")

100%|██████████| 87599/87599 [05:11<00:00, 281.27it/s]
100%|██████████| 10570/10570 [00:17<00:00, 610.63it/s]


 Total Train Samples:(82865, 3) , Total Validation Samples:(10112, 3)





In [42]:
# converted into a parquet format rather json for efficient storing as well as fast calling from the storage
df_train.to_parquet('train_squad.parquet')
df_validation.to_parquet('validation_squad.parquet')

In [96]:
train_path = 'train_squad.parquet' # change this accordingly
validation_path = 'validation_squad.parquet'
train_dataset = QuestionGenerationDataset(tokenizer,train_path)
validation_dataset = QuestionGenerationDataset(tokenizer,validation_path)

5000it [00:10, 472.83it/s]
5000it [00:10, 476.92it/s]


In [97]:
train_sample = train_dataset[5]
decoded_train_input = tokenizer.decode(train_sample['source_ids'])
decoded_train_output = tokenizer.decode(train_sample['target_ids'])

print(decoded_train_input)
print(decoded_train_output)

question: When did the Scholastic Magazine of Notre dame begin publishing? context: As at most other universities, Notre Dame's students run a number of news media outlets. The nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. Begun as a one-page journal in September 1876, the Scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the United States. The other magazine, The Juggler, is released twice a year and focuses on student literature and artwork. The Dome yearbook is published annually. The newspapers have varying publication interests, with The Observer published daily and mainly reporting university and other news, and staffed by students from both Notre Dame and Saint Mary's College. Unlike Scholastic and The Dome, The Observer is an independent publication and does not have a faculty advisor or any editorial oversight from the University. In 1987, wh

In [129]:
import pytorch_lightning as pl
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

class T5Tuner(pl.LightningModule):

    def __init__(self,batchsize, model, tokenizer):
        super(T5Tuner, self).__init__()
        self.batch_size = batchsize
        self.model = model
        self.tokenizer = tokenizer


    def forward( self, input_ids, attention_mask=None,
                decoder_attention_mask=None,
                lm_labels=None):

         outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=decoder_attention_mask,
            labels=lm_labels,
        )

         return outputs


    def training_step(self, batch, batch_idx):
        outputs = self.forward(
            input_ids=batch["source_ids"],
            attention_mask=batch["source_mask"],
            decoder_attention_mask=batch['target_mask'],
            lm_labels=batch['labels']
        )

        loss = outputs[0]
        self.log('train_loss',loss, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        outputs = self.forward(
            input_ids=batch["source_ids"],
            attention_mask=batch["source_mask"],
            decoder_attention_mask=batch['target_mask'],
            lm_labels=batch['labels']
        )

        loss = outputs[0]
        self.log("val_loss",loss, on_epoch=True)
        return loss

    def train_dataloader(self):
        return DataLoader(train_dataset, batch_size=self.batch_size,
                          num_workers=4)

    def val_dataloader(self):
        return DataLoader(validation_dataset,
                          batch_size=self.batch_size,
                          num_workers=4)

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=3e-4, eps=1e-8)
        return optimizer

In [130]:
device  = 'cuda' if torch.cuda.is_available() else "cpu"
model = T5Tuner(4,model,tokenizer)

trainer = pl.Trainer(max_epochs = 10,accelerator=device  )

trainer.fit(model)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 77.0 M
-----------------------------------------------------
77.0 M    Trainable params
0         Non-trainable params
77.0 M    Total params
307.845   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


In [132]:
!mkdir "tokenizer"
!mkdir "trained_model"
model.model.save_pretrained('trained_model')
tokenizer.save_pretrained('tokenizer')

mkdir: cannot create directory ‘tokenizer’: File exists
mkdir: cannot create directory ‘trained_model’: File exists


('tokenizer/tokenizer_config.json',
 'tokenizer/special_tokens_map.json',
 'tokenizer/spiece.model',
 'tokenizer/added_tokens.json')

## INFERENCE

In [133]:
trained_model_path = 'trained_model'
trained_tokenizer = 'tokenizer'
device = 'cpu'

In [163]:
model = T5ForConditionalGeneration.from_pretrained(trained_model_path, device_map='cpu')
tokenizer = AutoTokenizer.from_pretrained(trained_tokenizer)

In [159]:
context ="As at most other universities, Notre Dame's students run a number of news media outlets. The nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. Begun as a one-page journal in September 1876, the Scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the United States. The other magazine, The Juggler, is released twice a year and focuses on student literature and artwork. The Dome yearbook is published annually. The newspapers have varying publication interests, with The Observer published daily and mainly reporting university and other news, and staffed by students from both Notre Dame and Saint Mary's College. Unlike Scholastic and The Dome, The Observer is an independent publication and does not have a faculty advisor or any editorial oversight from the University. In 1987, when some students believed that The Observer began to show a conservative bias, a liberal newspaper, Common Sense was published. Likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper Irish Rover went into production. Neither paper is published as often as The Observer; however, all three are distributed to all students. Finally, in Spring 2008 an undergraduate journal for political science research, Beyond Politics, made its debut."
question = "When did the Scholastic Magazine of Notre dame begin publishing?"
text = "question: "+question + " " + "context: " + context +" "+ "answer: "
print(text)

question: When did the Scholastic Magazine of Notre dame begin publishing? context: As at most other universities, Notre Dame's students run a number of news media outlets. The nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. Begun as a one-page journal in September 1876, the Scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the United States. The other magazine, The Juggler, is released twice a year and focuses on student literature and artwork. The Dome yearbook is published annually. The newspapers have varying publication interests, with The Observer published daily and mainly reporting university and other news, and staffed by students from both Notre Dame and Saint Mary's College. Unlike Scholastic and The Dome, The Observer is an independent publication and does not have a faculty advisor or any editorial oversight from the University. In 1987, wh

In [164]:
encoding = tokenizer(question, context,return_tensors="pt")
print (encoding.keys())
input_ids,attention_mask  = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

input_ids


dict_keys(['input_ids', 'attention_mask'])


tensor([[  366,   410,     8, 16064,    40, 10057,  8336,    13,  7711, 10157,
            15,  1731,  9002,    58,     1,   282,    44,   167,   119,  8278,
             6,  7711,     3, 17084,    31,     7,   481,   661,     3,     9,
           381,    13,  1506,   783, 14290,     5,    37,  4169,  1236,    18,
          4312, 14290,   560,   386, 16265,     6,   321,     3,     9,  2252,
            11,  4390,  2478,     6,    11,   633, 13254,    11, 18178,     5,
         10129,   202,    38,     3,     9,    80,    18,  6492,  6378,    16,
          1600,   507,  3959,     6,     8, 16064,    40, 10057,  3835,    19,
          4683,  4394,  3718,    11,  3213,    12,    36,     8, 10043,  7558,
             3, 31003,  5707,    16,     8,   907,  1323,     5,    37,   119,
          3835,     6,    37,  3736,   122, 12683,     6,    19,  1883,  4394,
             3,     9,   215,    11,     3,  6915,    30,  1236,  6678,    11,
          7924,     5,    37, 10576,    15,   215,  

In [165]:
model.eval()
beam_outputs = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_length=72, # How long the generated questions should be
    early_stopping=True,
    num_beams=5,
    num_return_sequences=2
)

for beam_output in beam_outputs:
    sent = tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
    print(sent)

the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the.
the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the thes


London
