# Text generator using BART modules


# Install these modules first
Transformers, pyTorch
for better output


In [30]:
# importing the BART model and tokenizer
from transformers import BartForConditionalGeneration, BartTokenizer

In [31]:
# loading the pretrained weights for BART
# here, we use facebook's bart-large model
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0) # takes a while to load

In [32]:
# loading the raw text tokenizer for the BART model
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")

In [33]:
# <mask> tag is a special token
# that is used to specify a missing token for the BART model
sent = "We wrote a <mask> to the BART model. So we can use it to <mask> text. So that we can <mask> the text easily."

In [34]:
# preprocessing(tokenizing) the text as input for the BART model
tokenized_sent = tokenizer(sent, return_tensors='pt')

In [35]:
# generated encoded ids
generated_encoded = bart_model.generate(tokenized_sent['input_ids'])

In [37]:
# decoding the generated encoded ids
def fill_masks(text, tokenizer, model, max_masks=5): # Maximum number of masks to fill and we can adjust it
    max_length = 1024  # Maximum length of the generated text
    for _ in range(max_masks):
        if "<mask>" not in text:
            break
        input_ids = tokenizer(text, return_tensors="pt")["input_ids"]
        output_ids = model.generate(input_ids, max_length=max_length)
        decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        # Replace only the first <mask> in the text with the new prediction
        text = decoded
    return text

filled_full_text = fill_masks(sent, tokenizer, bart_model)
print(filled_full_text)

We wrote a model that is similar to the BART model. So we can use it to represent the text. So that we can read the text easily.
