In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from simplet5 import SimpleT5
from transformers import T5Tokenizer, T5ForConditionalGeneration

Global seed set to 42


In [2]:
# Load the dataset
file_path = r'input\news-summary\news_summary.csv'  # Use 'r' before the string to indicate a raw string
df = pd.read_csv(file_path, encoding='latin-1', usecols=['headlines', 'text'])

In [3]:
df.head()

Unnamed: 0,headlines,text
0,Daman & Diu revokes mandatory Rakshabandhan in...,The Administration of Union Territory Daman an...
1,Malaika slams user who trolled her for 'divorc...,Malaika Arora slammed an Instagram user who tr...
2,'Virgin' now corrected to 'Unmarried' in IGIMS...,The Indira Gandhi Institute of Medical Science...
3,Aaj aapne pakad liya: LeT man Dujana before be...,Lashkar-e-Taiba's Kashmir commander Abu Dujana...
4,Hotel staff to get training to spot signs of s...,Hotels in Maharashtra will train their staff t...


In [4]:
# simpleT5 expects dataframe to have 2 columns: "source_text" and "target_text"
df = df.rename(columns={"headlines":"target_text", "text":"source_text"})
df = df[['source_text', 'target_text']]

In [5]:
df.head()

Unnamed: 0,source_text,target_text
0,The Administration of Union Territory Daman an...,Daman & Diu revokes mandatory Rakshabandhan in...
1,Malaika Arora slammed an Instagram user who tr...,Malaika slams user who trolled her for 'divorc...
2,The Indira Gandhi Institute of Medical Science...,'Virgin' now corrected to 'Unmarried' in IGIMS...
3,Lashkar-e-Taiba's Kashmir commander Abu Dujana...,Aaj aapne pakad liya: LeT man Dujana before be...
4,Hotels in Maharashtra will train their staff t...,Hotel staff to get training to spot signs of s...


In [6]:
# T5 model expects a task related prefix: since it is a summarization task, we will add a prefix "summarize: "
df['source_text'] = "summarize: " + df['source_text']
df

Unnamed: 0,source_text,target_text
0,summarize: The Administration of Union Territo...,Daman & Diu revokes mandatory Rakshabandhan in...
1,summarize: Malaika Arora slammed an Instagram ...,Malaika slams user who trolled her for 'divorc...
2,summarize: The Indira Gandhi Institute of Medi...,'Virgin' now corrected to 'Unmarried' in IGIMS...
3,summarize: Lashkar-e-Taiba's Kashmir commander...,Aaj aapne pakad liya: LeT man Dujana before be...
4,summarize: Hotels in Maharashtra will train th...,Hotel staff to get training to spot signs of s...
...,...,...
4509,summarize: Fruit juice concentrate maker Rasna...,Rasna seeking ?250 cr revenue from snack categ...
4510,summarize: Former Indian cricketer Sachin Tend...,Sachin attends Rajya Sabha after questions on ...
4511,"summarize: Aamir Khan, while talking about rea...",Shouldn't rob their childhood: Aamir on kids r...
4512,summarize: The Maharashtra government has init...,"Asha Bhosle gets ?53,000 power bill for unused..."


In [7]:
train_df, test_df = train_test_split(df, test_size=0.3)
train_df.shape, test_df.shape

((3159, 2), (1355, 2))

In [8]:
from simplet5 import SimpleT5

model = SimpleT5()
model.from_pretrained(model_type="t5", model_name="t5-base")

In [9]:
model.train(train_df=train_df[:5000],
            eval_df=test_df[:100], 
            source_max_token_len=128, 
            target_max_token_len=50, 
            batch_size=8, max_epochs=5, use_gpu=False)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 222 M 
-----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
Global seed set to 42
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000025956BA58B0>
Traceback (most recent call last):
  File "c:\users\asura\appdata\local\programs\python\python39\lib\site-packages\torch\utils\data\dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "c:\users\asura\appdata\local\programs\python\python39\lib\site-packages\torch\utils\data\dataloader.py", line 1436, in _shutdown_workers
    if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


In [10]:
model.load_model("t5", "outputs\simplet5-epoch-4-train-loss-0.6131-val-loss-1.4665", use_gpu=False)

In [11]:
text_to_summarize="""Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.

Python is dynamically typed and garbage-collected. It supports multiple programming paradigms, including structured, particularly procedural, object-oriented, and functional programming. It is often described as a "batteries included" language due to its comprehensive standard library.

Guido van Rossum began working on Python in the late 1980s as a successor to the ABC programming language and first released it in 1991 as Python. Python was released in 2000. Python, released in 2008, was a major revision not completely backward-compatible with earlier versions. Python, released in 2020, was the last release of Python.

Python consistently ranks as one of the most popular programming languages. """
model.predict(text_to_summarize)

['Python is a high-level, general-purpose programming language']

In [12]:
# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

# Define the input text
input_text = """
... Inspired by the structure of the brain, artificial neural networks (ANN) are the answer to making computers more human like and help machines reason more like humans.

They are based on the neural structure of the brain. The brain basically learns from experience. It is natural proof that some problems that are beyond the scope of current computers are indeed solvable by small energy efficient packages.

Human Neurons:
To understand how artificial neural networks work let’s first briefly look at the human ones. The exact workings of the human brain are still a mystery. Yet, some aspects of this amazing processor are known. In particular, the most basic element of the human brain is a specific type of cell which, unlike the rest of the body, doesn’t appear to regenerate. Because this type of cell is the only part of the body that isn’t slowly replaced, it is assumed that these cells are what provides us with our abilities to remember, think, and apply previous experiences to our every action. These cells, all 100 billion of them, are known as neurons. Each of these neurons can connect with up to 200,000 other neurons, although 1,000 to 10,000 is typical.

Neural networks are typically organized in layers. Layers are made up of a number of interconnected ‘nodes’ which contain an ‘activation function’. Patterns are presented to the network via the ‘input layer’, which communicates to one or more ‘hidden layers’ where the actual processing is done via a system of weighted ‘connections’. 
In the same way that we learn from experience in our lives as mentioned above, neural networks require data to learn. In most cases, the more data that can be thrown at a neural network, the more accurate it will become. Think of it like any task you do over and over. Over time, you gradually get more efficient and make fewer mistakes.

When researchers or computer scientists set out to train a neural network, they typically divide their data into three sets. First is a training set, which helps the network establish the various weights between its nodes. After this, they fine-tune it using a validation data set. Finally, they’ll use a test set to see if it can successfully turn the input into the desired output.

During the training and supervisory stage, the ANN is taught what to look for and what its output should be, using Yes/No question types with binary numbers.


The simplest variant is the feed-forward neural network. This type of artificial neural network algorithm passes information straight through from input to processing nodes to outputs. It may or may not have hidden node layers, making their functioning more interpretable.

More complex are recurrent neural networks. These deep learning algorithms save the output of processing nodes and feed the result back into the model. This is how the model is said to learn.

Convolutional neural networks are popular today, particularly in the realm of image recognition. This specific type of neural network algorithm has been used in many of the most advanced applications of AI including facial recognition, text digitization and natural language processing.

There are several strategies for learning, such as:
Supervised Learning
Essentially, a strategy that involves a teacher that is smarter than the network itself. For example, let’s take the facial recognition example. The teacher shows the network a bunch of faces, and the teacher already knows the name associated with each face. The network makes its guesses, then the teacher provides the network with the answers. The network can then compare its answers to the known “correct” ones and make adjustments according to its errors.

Unsupervised Learning
Required when there isn’t an example data set with known answers. Imagine searching for a hidden pattern in a data set. An application of this is clustering, i.e. dividing a set of elements into groups according to some unknown pattern. We won’t be looking at any examples of unsupervised learning in this chapter, as this strategy is less relevant for our examples.

Reinforcement Learning
A strategy built on observation. Think of a little mouse running through a maze. If it turns left, it gets a piece of cheese; if it turns right, it receives a little shock. (Don’t worry, this is just a pretend mouse.) Presumably, the mouse will learn over time to turn left. Its neural network makes a decision with an outcome (turn left or right) and observes its environment (yum or ouch). If the observation is negative, the network can adjust its weights in order to make a different decision the next time. Reinforcement learning is common in robotics. At time t, the robot performs a task and observes the results. Did it crash into a wall or fall off a table? Or is it unharmed? We’ll look at reinforcement learning in the context of our simulated steering vehicles.) ...
"""

# Split the input text into smaller segments (max_seq_length tokens each)
max_seq_length = 512  # Maximum sequence length supported by the model
input_segments = [input_text[i:i+max_seq_length] for i in range(0, len(input_text), max_seq_length)]

# Initialize an empty list to store the generated summaries
summaries = []

# Generate summaries for each input segment
for segment in input_segments:
    input_ids = tokenizer.encode("summarize: " + segment, return_tensors="pt", max_length=max_seq_length, truncation=True)
    summary_ids = model.generate(input_ids, max_length=150, min_length=30, num_beams=2, length_penalty=2.0, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    summaries.append(summary)

# Join the summaries of the segments into a final summary
final_summary = " ".join(summaries)

# Print or use the final summary as needed
print(final_summary)


artificial neural networks are the answer to making computers more human like. they are based on the neural structure of the brain. it is natural proof that some problems beyond the scope of current computers are indeed solvable by small energy efficient packages. the exact workings of the human brain are still a mystery. but some aspects of this amazing processor are known. the most basic element of the human brain is a specific type of cell. neurons can connect with up to 200,000 other neurons. each of these neurons can connect with up to 200,000 other neurons. layers are made up of a number of interconnected ‘nodes’ which contain an ‘activation function’. the more data can be thrown at a neural network, the more accurate it will become. the more data can be thrown at a neural network, the more accurate it will become. e learn from experience in our lives. the ANN is taught what to look for and what its output should be. the simplest variant is the feed-forward neural network. recurr