In [26]:
# !pip install transformers
# !pip install sentencepiece
# !pip install pip --upgrade
# !pip install torch torchvision torchaudio

In [1]:
from IPython import display
from transformers import EncoderDecoderModel, AutoTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import AutoModelWithLMHead
import warnings
warnings.filterwarnings('ignore')

from sentencepiece import SentencePieceTrainer
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [2]:
def create_summary(model, tokenizer, text, max_length=512, min_length=30, length_penalty=5.0, repetition_penalty=5.0, num_beams=3):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=max_length, truncation=True)
    outputs = model.generate(
        inputs,
        max_length=max_length,
        min_length=min_length,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_beams=num_beams,
        early_stopping=True)
    print(tokenizer.decode(outputs[0]))

In [4]:
#
# T5 Summarizing 
#

In [5]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Downloading model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [10]:
story = """
Synthetic biology is a burgeoning field that combines principles from biology, engineering, and computer science to create novel biological systems or redesign existing ones. It offers boundless possibilities, revolutionizing everything from medicine and agriculture to energy production and environmental conservation. This article delves into the fascinating world of synthetic biology, exploring its applications, challenges, and the potential it holds for shaping our future.
"""

summary = create_summary(model, tokenizer, story, 50, 30, 5.5, 5.5, 3)
print(summary)

<pad>Synthetic biology is a burgeoning field that combines principles from biology, engineering, and computer science to create novel biological systems or redesign existing ones. it offers boundless possibilities, revolutionizing everything from medicine and agriculture to the
None


In [11]:
#
# T5 Base Summarization
#

In [12]:
tokenizer_t5news = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-summarize-news")
model_t5news = AutoModelWithLMHead.from_pretrained('mrm8488/t5-base-finetuned-summarize-news')

Downloading (…)okenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [16]:
story = """
Financial markets are not the economy and the economy is not financial markets. But it's often said that they're both afraid of the same things. In this case, the concern is that the economy is careening toward a recession.
"The alarm bells are telling us that something is going to break somewhere in the financial system," said Karl Schamotta, chief market strategist at Corpay, a foreign exchange service in Toronto.
Stock markets have sold off over the past three months. Since the beginning of July, the TSX wiped out all of the gains it made in the first half of the year.
U.S. stock indexes, such as the S&P 500 and the Dow Jones Industrial Average, have remained in positive territory, but not by much.
Those markets reflect a doomy prognosis that isn't necessarily backed up by the economic data.
"""

summary = create_summary(model_t5news, tokenizer_t5news, story, 128, 64, 5.5, 5.5, 3)
print(summary)

<pad> Trader Karl Schamotta, chief market strategist at Corpay, said that the alarm bells are telling us that something is going to break somewhere in the financial system. "The alarm bells are telling us that something is going to break somewhere in the financial system," he added. Stock markets have sold off over the past three months. Since the beginning of July, the TSX wiped out all of the gains it made in the first two months.</s>
None
