In [2]:
#Use a pre-trained google/flan-t5-small as the model.
#Verify if the summarization task works.
from transformers import T5ForConditionalGeneration, T5Tokenizer

def summarize_text(text, max_length=150):
    # Load pre-trained model and tokenizer
    model_name = "google/flan-t5-small"
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)

    # Preprocess input text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)

    # Generate summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

# Example text to summarize
text_to_summarize = """
In 2009, NASA launched a powerful telescope named Kepler to search for exoplanets,
or planets outside our solar system. Kepler has since discovered thousands of
exoplanets by observing changes in the brightness of stars. By 2018, Kepler had
exhausted its fuel supply and could no longer continue its mission.
However, the data collected by Kepler continues to be analyzed, leading to new discoveries about the diversity of exoplanets in our galaxy.
"""

# Summarize the text
summary = summarize_text(text_to_summarize)
print("Summary:", summary)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Summary: NASA has launched a powerful telescope named Kepler to search for exoplanets, or planets outside our solar system, according to new data collected by Kepler.


In [3]:
#Verify if the Q&A task works.
from transformers import T5ForConditionalGeneration, T5Tokenizer

def answer_question(context, question):
    # Load pre-trained model and tokenizer
    model_name = "google/flan-t5-small"
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)

    # Encode the input
    input_text = "question: " + question + " context: " + context
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    # Generate answer
    outputs = model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return answer

# Example context and question
context = """
The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower. Constructed from 1887 to 1889 as the entrance to the 1889 World's Fair, it was initially criticized by some of France's leading artists and intellectuals for its design, but it has become a global cultural icon of France and one of the most recognizable structures in the world. The Eiffel Tower is the most-visited paid monument in the world; 6.91 million people ascended it in 2015.

The tower is 324 meters (1,063 ft) tall, about the same height as an 81-story building, and the tallest structure in Paris. Its base is square, measuring 125 meters (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930.

The tower has three levels for visitors, with restaurants on the first and second levels. The top level's upper platform is 276 m (906 ft) above the ground, the highest observation deck accessible to the public in the European Union. Tickets can be purchased to ascend by stairs or lift to the first and second levels. The climb from ground level to the first level is over 300 steps, as is the walk from the first to the second level. Although there is a staircase to the top level, it is usually accessible only by lift.
"""

question = "What is the height of the Eiffel Tower?"

# Get the answer
answer = answer_question(context, question)
print("Answer:", answer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Answer: 324 meters


In [4]:
#Verify if English to French transla'on task works.
from transformers import T5ForConditionalGeneration, T5Tokenizer

def translate_text(text):
    # Load pre-trained model and tokenizer
    model_name = "google/flan-t5-small"
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)

    # Encode the input text
    input_ids = tokenizer.encode("translate English to French: " + text, return_tensors="pt")

    # Generate translation
    outputs = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return translation

# Example text to translate
english_text = "Hello, how are you?"

# Translate the text
french_translation = translate_text(english_text)
print("French Translation:", french_translation)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


French Translation: Hello, c'est-à-dire?
