In [1]:
import torch
import pprint
import evaluate
import numpy as np

from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    TrainingArguments,
    Trainer
)


  from .autonotebook import tqdm as notebook_tqdm





In [2]:
import tqdm
from datasets import load_dataset, Dataset

In [3]:
import numpy as np
import pandas as pd
df = pd.read_csv('./output.csv')

In [4]:
from sklearn.model_selection import train_test_split
dataset_train, dataset_test = train_test_split(df, test_size=0.2, random_state=42)

In [5]:
dataset_train.reset_index(drop=True, inplace=True)
dataset_test.reset_index(drop=True, inplace=True)

In [6]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [7]:
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [8]:
BATCH_SIZE = 4
NUM_PROCS = 4
EPOCHS = 10
OUT_DIR = 'results'
MAX_LENGTH = 512

In [9]:
rouge = evaluate.load("rouge")

In [10]:
def summarize_text(text, max_input_length=512, max_output_length=25):
    """
    Summarizes input text using a T5 model.
    """
    input_text = text.strip()
    
    # Tokenize
    inputs = tokenizer.encode(input_text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
    
    # Generate the summary
    outputs = model.generate(inputs, max_length=max_output_length, min_length=5, length_penalty=2.0, num_beams=4)
    
    # Decode the summary
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

In [11]:
data = df[:500] # use only 500 rows for testing finetuning process

from sklearn.model_selection import train_test_split
dataset_train, dataset_test = train_test_split(data, test_size=0.2, random_state=42)

In [12]:
dataset_train.reset_index(drop=True, inplace=True)
dataset_test.reset_index(drop=True, inplace=True)

In [13]:
dataset_test["generated_summary"] = dataset_test["preprocessed_text"].apply(summarize_text)

In [14]:
results = rouge.compute(
    predictions=dataset_test["generated_summary"].tolist(),
    references=dataset_test["Summary"].tolist()
)

In [15]:
for metric, score in results.items():
    print(f"{metric}: {score:.4f}")

rouge1: 0.0927
rouge2: 0.0264
rougeL: 0.0869
rougeLsum: 0.0861


In [16]:
def preprocess_data(row, tokenizer, max_input_length=512, max_target_length=50):
    """
    Tokenize a single row for T5 training.
    """

    inputs = row["preprocessed_text"] 
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, padding="max_length")
    
    labels = tokenizer(row["Summary"], max_length=max_target_length, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    
    return model_inputs


In [17]:
tokenized_train = dataset_train.apply(
    lambda row: preprocess_data(row, tokenizer),
    axis=1
)

tokenized_test = dataset_test.apply(
    lambda row: preprocess_data(row, tokenizer),
    axis=1
)

In [18]:
tokenized_train


0      [input_ids, attention_mask, labels]
1      [input_ids, attention_mask, labels]
2      [input_ids, attention_mask, labels]
3      [input_ids, attention_mask, labels]
4      [input_ids, attention_mask, labels]
                      ...                 
395    [input_ids, attention_mask, labels]
396    [input_ids, attention_mask, labels]
397    [input_ids, attention_mask, labels]
398    [input_ids, attention_mask, labels]
399    [input_ids, attention_mask, labels]
Length: 400, dtype: object

In [19]:
from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir="t5_summarization", 
    evaluation_strategy="steps",    
    eval_steps=50,                 
    logging_dir="./logs",           
    logging_steps=10,              
    save_steps=20,                
    save_total_limit=5,             
    per_device_train_batch_size=8,  
    per_device_eval_batch_size=8,   
    num_train_epochs=30,             
    learning_rate=5e-5,             
    weight_decay=0.01,             
    warmup_steps=50,               
    fp16=torch.cuda.is_available(),  
    report_to="none"                
)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer
)


  trainer = Trainer(


In [20]:
history = trainer.train()

  0%|          | 0/1500 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
  1%|          | 10/1500 [00:10<17:29,  1.42it/s]

{'loss': 17.4645, 'grad_norm': 55.47995376586914, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2}


  1%|▏         | 20/1500 [00:17<20:03,  1.23it/s]

{'loss': 17.3244, 'grad_norm': 51.48433303833008, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.4}


  2%|▏         | 30/1500 [00:33<34:40,  1.42s/it]

{'loss': 15.5257, 'grad_norm': 63.663856506347656, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.6}


  3%|▎         | 40/1500 [00:44<25:42,  1.06s/it]

{'loss': 12.8032, 'grad_norm': 67.64879608154297, 'learning_rate': 3.6e-05, 'epoch': 0.8}


  3%|▎         | 50/1500 [00:57<24:34,  1.02s/it]

{'loss': 7.7723, 'grad_norm': 67.86785125732422, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.0}


                                                 
  3%|▎         | 50/1500 [01:02<24:34,  1.02s/it]

{'eval_loss': 5.432809829711914, 'eval_runtime': 4.7319, 'eval_samples_per_second': 21.133, 'eval_steps_per_second': 2.747, 'epoch': 1.0}


  4%|▍         | 60/1500 [01:12<27:35,  1.15s/it]

{'loss': 3.6888, 'grad_norm': 28.61921501159668, 'learning_rate': 4.982758620689655e-05, 'epoch': 1.2}


  5%|▍         | 70/1500 [01:26<31:25,  1.32s/it]

{'loss': 2.2417, 'grad_norm': 4.001091957092285, 'learning_rate': 4.9482758620689655e-05, 'epoch': 1.4}


  5%|▌         | 80/1500 [01:40<32:41,  1.38s/it]

{'loss': 1.4925, 'grad_norm': 3.7462856769561768, 'learning_rate': 4.913793103448276e-05, 'epoch': 1.6}


  6%|▌         | 90/1500 [01:58<33:29,  1.43s/it]  

{'loss': 1.357, 'grad_norm': 2.4642274379730225, 'learning_rate': 4.8793103448275864e-05, 'epoch': 1.8}


  7%|▋         | 100/1500 [02:11<24:37,  1.06s/it]

{'loss': 1.0875, 'grad_norm': 1.7240034341812134, 'learning_rate': 4.844827586206897e-05, 'epoch': 2.0}


                                                  
  7%|▋         | 100/1500 [02:17<24:37,  1.06s/it]

{'eval_loss': 0.8633591532707214, 'eval_runtime': 5.3711, 'eval_samples_per_second': 18.618, 'eval_steps_per_second': 2.42, 'epoch': 2.0}


  7%|▋         | 110/1500 [02:29<24:53,  1.07s/it]  

{'loss': 0.9165, 'grad_norm': 1.657009482383728, 'learning_rate': 4.810344827586207e-05, 'epoch': 2.2}


  8%|▊         | 120/1500 [02:39<24:48,  1.08s/it]

{'loss': 0.8916, 'grad_norm': 2.5704824924468994, 'learning_rate': 4.7758620689655176e-05, 'epoch': 2.4}


  9%|▊         | 130/1500 [02:52<15:50,  1.44it/s]

{'loss': 0.8536, 'grad_norm': 1.669049620628357, 'learning_rate': 4.741379310344828e-05, 'epoch': 2.6}


  9%|▉         | 140/1500 [03:02<17:56,  1.26it/s]

{'loss': 0.8346, 'grad_norm': 3.8465700149536133, 'learning_rate': 4.7068965517241385e-05, 'epoch': 2.8}


 10%|█         | 150/1500 [03:15<18:45,  1.20it/s]

{'loss': 0.7635, 'grad_norm': 1.2653228044509888, 'learning_rate': 4.672413793103448e-05, 'epoch': 3.0}


                                                  
 10%|█         | 150/1500 [03:19<18:45,  1.20it/s]

{'eval_loss': 0.6658065319061279, 'eval_runtime': 4.2366, 'eval_samples_per_second': 23.604, 'eval_steps_per_second': 3.068, 'epoch': 3.0}


 11%|█         | 160/1500 [03:28<18:16,  1.22it/s]

{'loss': 0.6788, 'grad_norm': 1.230341911315918, 'learning_rate': 4.6379310344827586e-05, 'epoch': 3.2}


 11%|█▏        | 170/1500 [03:42<23:25,  1.06s/it]

{'loss': 0.687, 'grad_norm': 1.4028878211975098, 'learning_rate': 4.603448275862069e-05, 'epoch': 3.4}


 12%|█▏        | 180/1500 [03:54<25:05,  1.14s/it]

{'loss': 0.6521, 'grad_norm': 1.6023094654083252, 'learning_rate': 4.5689655172413794e-05, 'epoch': 3.6}


 13%|█▎        | 190/1500 [04:05<22:49,  1.05s/it]

{'loss': 0.6325, 'grad_norm': 1.122672438621521, 'learning_rate': 4.53448275862069e-05, 'epoch': 3.8}


 13%|█▎        | 200/1500 [04:15<20:16,  1.07it/s]

{'loss': 0.6346, 'grad_norm': 1.1407829523086548, 'learning_rate': 4.5e-05, 'epoch': 4.0}


                                                  
 13%|█▎        | 200/1500 [04:18<20:16,  1.07it/s]

{'eval_loss': 0.5679903626441956, 'eval_runtime': 2.832, 'eval_samples_per_second': 35.311, 'eval_steps_per_second': 4.59, 'epoch': 4.0}


 14%|█▍        | 210/1500 [04:32<24:01,  1.12s/it]

{'loss': 0.5433, 'grad_norm': 0.9694510698318481, 'learning_rate': 4.465517241379311e-05, 'epoch': 4.2}


 15%|█▍        | 220/1500 [04:43<26:32,  1.24s/it]

{'loss': 0.5954, 'grad_norm': 1.7336500883102417, 'learning_rate': 4.431034482758621e-05, 'epoch': 4.4}


 15%|█▌        | 230/1500 [04:53<14:44,  1.44it/s]

{'loss': 0.5321, 'grad_norm': 1.1311124563217163, 'learning_rate': 4.396551724137931e-05, 'epoch': 4.6}


 16%|█▌        | 240/1500 [05:00<13:21,  1.57it/s]

{'loss': 0.5915, 'grad_norm': 1.5829060077667236, 'learning_rate': 4.362068965517241e-05, 'epoch': 4.8}


 17%|█▋        | 250/1500 [05:07<10:35,  1.97it/s]

{'loss': 0.5612, 'grad_norm': 1.094623327255249, 'learning_rate': 4.327586206896552e-05, 'epoch': 5.0}


                                                  
 17%|█▋        | 250/1500 [05:09<10:35,  1.97it/s]

{'eval_loss': 0.5433144569396973, 'eval_runtime': 1.2552, 'eval_samples_per_second': 79.671, 'eval_steps_per_second': 10.357, 'epoch': 5.0}


 17%|█▋        | 260/1500 [05:12<06:47,  3.04it/s]

{'loss': 0.5146, 'grad_norm': 0.6297807693481445, 'learning_rate': 4.293103448275863e-05, 'epoch': 5.2}


 18%|█▊        | 270/1500 [05:18<06:43,  3.05it/s]

{'loss': 0.5268, 'grad_norm': 0.7989060878753662, 'learning_rate': 4.2586206896551725e-05, 'epoch': 5.4}


 19%|█▊        | 280/1500 [05:20<05:10,  3.93it/s]

{'loss': 0.5245, 'grad_norm': 0.9440982341766357, 'learning_rate': 4.224137931034483e-05, 'epoch': 5.6}


 19%|█▉        | 290/1500 [05:24<05:25,  3.72it/s]

{'loss': 0.5488, 'grad_norm': 1.0897860527038574, 'learning_rate': 4.1896551724137934e-05, 'epoch': 5.8}


 20%|██        | 300/1500 [05:27<05:01,  3.98it/s]

{'loss': 0.5183, 'grad_norm': 1.024413824081421, 'learning_rate': 4.155172413793104e-05, 'epoch': 6.0}


                                                  
 20%|██        | 300/1500 [05:28<05:01,  3.98it/s]

{'eval_loss': 0.5344110131263733, 'eval_runtime': 1.0099, 'eval_samples_per_second': 99.024, 'eval_steps_per_second': 12.873, 'epoch': 6.0}


 21%|██        | 310/1500 [05:32<05:31,  3.59it/s]

{'loss': 0.4942, 'grad_norm': 0.8143677711486816, 'learning_rate': 4.120689655172414e-05, 'epoch': 6.2}


 21%|██▏       | 320/1500 [05:34<04:58,  3.96it/s]

{'loss': 0.4875, 'grad_norm': 0.9180889129638672, 'learning_rate': 4.086206896551724e-05, 'epoch': 6.4}


 22%|██▏       | 330/1500 [05:38<05:11,  3.75it/s]

{'loss': 0.5016, 'grad_norm': 1.1784014701843262, 'learning_rate': 4.0517241379310344e-05, 'epoch': 6.6}


 23%|██▎       | 340/1500 [05:40<04:51,  3.97it/s]

{'loss': 0.4661, 'grad_norm': 0.8227290511131287, 'learning_rate': 4.0172413793103455e-05, 'epoch': 6.8}


 23%|██▎       | 350/1500 [05:44<05:07,  3.75it/s]

{'loss': 0.5426, 'grad_norm': 0.8527019619941711, 'learning_rate': 3.982758620689656e-05, 'epoch': 7.0}


                                                  
 23%|██▎       | 350/1500 [05:45<05:07,  3.75it/s]

{'eval_loss': 0.5303544402122498, 'eval_runtime': 1.011, 'eval_samples_per_second': 98.912, 'eval_steps_per_second': 12.859, 'epoch': 7.0}


 24%|██▍       | 360/1500 [05:48<05:01,  3.78it/s]

{'loss': 0.5008, 'grad_norm': 1.0830916166305542, 'learning_rate': 3.9482758620689656e-05, 'epoch': 7.2}


 25%|██▍       | 370/1500 [05:52<05:02,  3.74it/s]

{'loss': 0.5452, 'grad_norm': 0.8343468308448792, 'learning_rate': 3.913793103448276e-05, 'epoch': 7.4}


 25%|██▌       | 380/1500 [05:54<04:41,  3.98it/s]

{'loss': 0.4611, 'grad_norm': 0.6817488074302673, 'learning_rate': 3.8793103448275865e-05, 'epoch': 7.6}


 26%|██▌       | 390/1500 [05:58<04:57,  3.73it/s]

{'loss': 0.444, 'grad_norm': 0.7619191408157349, 'learning_rate': 3.844827586206897e-05, 'epoch': 7.8}


 27%|██▋       | 400/1500 [06:00<04:36,  3.97it/s]

{'loss': 0.4356, 'grad_norm': 0.9807654619216919, 'learning_rate': 3.8103448275862066e-05, 'epoch': 8.0}


                                                  
 27%|██▋       | 400/1500 [06:01<04:36,  3.97it/s]

{'eval_loss': 0.5297281742095947, 'eval_runtime': 1.0097, 'eval_samples_per_second': 99.042, 'eval_steps_per_second': 12.875, 'epoch': 8.0}


 27%|██▋       | 410/1500 [06:05<05:05,  3.57it/s]

{'loss': 0.4799, 'grad_norm': 0.9346328377723694, 'learning_rate': 3.775862068965517e-05, 'epoch': 8.2}


 28%|██▊       | 420/1500 [06:08<04:32,  3.96it/s]

{'loss': 0.4613, 'grad_norm': 1.2138407230377197, 'learning_rate': 3.741379310344828e-05, 'epoch': 8.4}


 29%|██▊       | 430/1500 [06:12<04:46,  3.73it/s]

{'loss': 0.4456, 'grad_norm': 0.6451516151428223, 'learning_rate': 3.7068965517241385e-05, 'epoch': 8.6}


 29%|██▉       | 440/1500 [06:14<04:28,  3.95it/s]

{'loss': 0.4932, 'grad_norm': 1.7848231792449951, 'learning_rate': 3.672413793103448e-05, 'epoch': 8.8}


 30%|███       | 450/1500 [06:18<04:41,  3.73it/s]

{'loss': 0.4642, 'grad_norm': 0.7115218043327332, 'learning_rate': 3.637931034482759e-05, 'epoch': 9.0}


                                                  
 30%|███       | 450/1500 [06:19<04:41,  3.73it/s]

{'eval_loss': 0.5246304273605347, 'eval_runtime': 1.0206, 'eval_samples_per_second': 97.978, 'eval_steps_per_second': 12.737, 'epoch': 9.0}


 31%|███       | 460/1500 [06:22<04:38,  3.74it/s]

{'loss': 0.4578, 'grad_norm': 0.8322596549987793, 'learning_rate': 3.603448275862069e-05, 'epoch': 9.2}


 31%|███▏      | 470/1500 [06:26<04:36,  3.72it/s]

{'loss': 0.4448, 'grad_norm': 0.7244075536727905, 'learning_rate': 3.5689655172413795e-05, 'epoch': 9.4}


 32%|███▏      | 480/1500 [06:28<04:20,  3.91it/s]

{'loss': 0.5048, 'grad_norm': 0.7905527353286743, 'learning_rate': 3.53448275862069e-05, 'epoch': 9.6}


 33%|███▎      | 490/1500 [06:32<04:35,  3.67it/s]

{'loss': 0.4024, 'grad_norm': 0.9224662184715271, 'learning_rate': 3.5e-05, 'epoch': 9.8}


 33%|███▎      | 500/1500 [06:35<04:14,  3.93it/s]

{'loss': 0.4682, 'grad_norm': 1.3348708152770996, 'learning_rate': 3.465517241379311e-05, 'epoch': 10.0}


                                                  
 33%|███▎      | 500/1500 [06:36<04:14,  3.93it/s]

{'eval_loss': 0.5246934294700623, 'eval_runtime': 1.0206, 'eval_samples_per_second': 97.982, 'eval_steps_per_second': 12.738, 'epoch': 10.0}


 34%|███▍      | 510/1500 [06:40<04:44,  3.48it/s]

{'loss': 0.479, 'grad_norm': 0.7869178652763367, 'learning_rate': 3.431034482758621e-05, 'epoch': 10.2}


 35%|███▍      | 520/1500 [06:42<04:09,  3.92it/s]

{'loss': 0.3777, 'grad_norm': 0.787924587726593, 'learning_rate': 3.3965517241379316e-05, 'epoch': 10.4}


 35%|███▌      | 530/1500 [06:46<04:23,  3.68it/s]

{'loss': 0.4492, 'grad_norm': 0.6761369109153748, 'learning_rate': 3.3620689655172414e-05, 'epoch': 10.6}


 36%|███▌      | 540/1500 [06:49<04:04,  3.93it/s]

{'loss': 0.4324, 'grad_norm': 0.8727723360061646, 'learning_rate': 3.327586206896552e-05, 'epoch': 10.8}


 37%|███▋      | 550/1500 [06:53<04:16,  3.71it/s]

{'loss': 0.4928, 'grad_norm': 1.3017637729644775, 'learning_rate': 3.293103448275862e-05, 'epoch': 11.0}


                                                  
 37%|███▋      | 550/1500 [06:54<04:16,  3.71it/s]

{'eval_loss': 0.5271170139312744, 'eval_runtime': 1.0216, 'eval_samples_per_second': 97.887, 'eval_steps_per_second': 12.725, 'epoch': 11.0}


 37%|███▋      | 560/1500 [06:56<04:10,  3.75it/s]

{'loss': 0.39, 'grad_norm': 0.7221291065216064, 'learning_rate': 3.2586206896551726e-05, 'epoch': 11.2}


 38%|███▊      | 570/1500 [07:00<04:11,  3.70it/s]

{'loss': 0.4599, 'grad_norm': 1.2596242427825928, 'learning_rate': 3.2241379310344824e-05, 'epoch': 11.4}


 39%|███▊      | 580/1500 [07:03<03:54,  3.92it/s]

{'loss': 0.4609, 'grad_norm': 0.6871819496154785, 'learning_rate': 3.1896551724137935e-05, 'epoch': 11.6}


 39%|███▉      | 590/1500 [07:07<04:08,  3.67it/s]

{'loss': 0.4371, 'grad_norm': 0.7755969762802124, 'learning_rate': 3.155172413793104e-05, 'epoch': 11.8}


 40%|████      | 600/1500 [07:09<03:48,  3.95it/s]

{'loss': 0.4154, 'grad_norm': 4.396142482757568, 'learning_rate': 3.120689655172414e-05, 'epoch': 12.0}


                                                  
 40%|████      | 600/1500 [07:10<03:48,  3.95it/s]

{'eval_loss': 0.5251719355583191, 'eval_runtime': 1.0248, 'eval_samples_per_second': 97.576, 'eval_steps_per_second': 12.685, 'epoch': 12.0}


 41%|████      | 610/1500 [07:14<04:11,  3.54it/s]

{'loss': 0.3928, 'grad_norm': 0.7241643667221069, 'learning_rate': 3.086206896551724e-05, 'epoch': 12.2}


 41%|████▏     | 620/1500 [07:17<03:43,  3.94it/s]

{'loss': 0.4182, 'grad_norm': 0.789628803730011, 'learning_rate': 3.0517241379310348e-05, 'epoch': 12.4}


 42%|████▏     | 630/1500 [07:21<03:54,  3.70it/s]

{'loss': 0.4172, 'grad_norm': 0.6868165731430054, 'learning_rate': 3.017241379310345e-05, 'epoch': 12.6}


 43%|████▎     | 640/1500 [07:23<03:37,  3.95it/s]

{'loss': 0.4473, 'grad_norm': 0.9842580556869507, 'learning_rate': 2.9827586206896553e-05, 'epoch': 12.8}


 43%|████▎     | 650/1500 [07:27<03:51,  3.68it/s]

{'loss': 0.4366, 'grad_norm': 0.6711007952690125, 'learning_rate': 2.9482758620689654e-05, 'epoch': 13.0}


                                                  
 43%|████▎     | 650/1500 [07:28<03:51,  3.68it/s]

{'eval_loss': 0.5259883999824524, 'eval_runtime': 1.0279, 'eval_samples_per_second': 97.282, 'eval_steps_per_second': 12.647, 'epoch': 13.0}


 44%|████▍     | 660/1500 [07:31<03:44,  3.74it/s]

{'loss': 0.4128, 'grad_norm': 0.8139483332633972, 'learning_rate': 2.913793103448276e-05, 'epoch': 13.2}


 45%|████▍     | 670/1500 [07:35<03:45,  3.68it/s]

{'loss': 0.4169, 'grad_norm': 0.7770506143569946, 'learning_rate': 2.8793103448275865e-05, 'epoch': 13.4}


 45%|████▌     | 680/1500 [07:38<03:29,  3.92it/s]

{'loss': 0.4336, 'grad_norm': 0.7557099461555481, 'learning_rate': 2.844827586206897e-05, 'epoch': 13.6}


 46%|████▌     | 690/1500 [07:42<03:40,  3.68it/s]

{'loss': 0.4172, 'grad_norm': 0.7813484072685242, 'learning_rate': 2.810344827586207e-05, 'epoch': 13.8}


 47%|████▋     | 700/1500 [07:44<03:23,  3.92it/s]

{'loss': 0.4303, 'grad_norm': 1.09162437915802, 'learning_rate': 2.7758620689655175e-05, 'epoch': 14.0}


                                                  
 47%|████▋     | 700/1500 [07:45<03:23,  3.92it/s]

{'eval_loss': 0.5300115942955017, 'eval_runtime': 1.0215, 'eval_samples_per_second': 97.892, 'eval_steps_per_second': 12.726, 'epoch': 14.0}


 47%|████▋     | 710/1500 [07:49<03:48,  3.46it/s]

{'loss': 0.3641, 'grad_norm': 0.6003327369689941, 'learning_rate': 2.7413793103448275e-05, 'epoch': 14.2}


 48%|████▊     | 720/1500 [07:52<03:18,  3.92it/s]

{'loss': 0.4048, 'grad_norm': 0.8925138115882874, 'learning_rate': 2.706896551724138e-05, 'epoch': 14.4}


 49%|████▊     | 730/1500 [07:56<03:28,  3.70it/s]

{'loss': 0.453, 'grad_norm': 1.2211285829544067, 'learning_rate': 2.672413793103448e-05, 'epoch': 14.6}


 49%|████▉     | 740/1500 [07:58<03:14,  3.91it/s]

{'loss': 0.4314, 'grad_norm': 0.6981301307678223, 'learning_rate': 2.637931034482759e-05, 'epoch': 14.8}


 50%|█████     | 750/1500 [08:02<03:22,  3.71it/s]

{'loss': 0.4235, 'grad_norm': 0.949126660823822, 'learning_rate': 2.6034482758620692e-05, 'epoch': 15.0}


                                                  
 50%|█████     | 750/1500 [08:03<03:22,  3.71it/s]

{'eval_loss': 0.5280553698539734, 'eval_runtime': 1.0107, 'eval_samples_per_second': 98.946, 'eval_steps_per_second': 12.863, 'epoch': 15.0}


 51%|█████     | 760/1500 [08:06<03:16,  3.77it/s]

{'loss': 0.4032, 'grad_norm': 0.7931534051895142, 'learning_rate': 2.5689655172413796e-05, 'epoch': 15.2}


 51%|█████▏    | 770/1500 [08:10<03:16,  3.72it/s]

{'loss': 0.4058, 'grad_norm': 0.8583183884620667, 'learning_rate': 2.5344827586206897e-05, 'epoch': 15.4}


 52%|█████▏    | 780/1500 [08:12<03:01,  3.97it/s]

{'loss': 0.4027, 'grad_norm': 1.1719425916671753, 'learning_rate': 2.5e-05, 'epoch': 15.6}


 53%|█████▎    | 790/1500 [08:16<03:10,  3.73it/s]

{'loss': 0.4253, 'grad_norm': 0.7402713894844055, 'learning_rate': 2.4655172413793105e-05, 'epoch': 15.8}


 53%|█████▎    | 800/1500 [08:19<02:57,  3.94it/s]

{'loss': 0.3972, 'grad_norm': 1.045667290687561, 'learning_rate': 2.4310344827586206e-05, 'epoch': 16.0}


                                                  
 53%|█████▎    | 800/1500 [08:20<02:57,  3.94it/s]

{'eval_loss': 0.5291574597358704, 'eval_runtime': 1.0208, 'eval_samples_per_second': 97.964, 'eval_steps_per_second': 12.735, 'epoch': 16.0}


 54%|█████▍    | 810/1500 [08:24<03:13,  3.56it/s]

{'loss': 0.356, 'grad_norm': 0.8380497694015503, 'learning_rate': 2.3965517241379314e-05, 'epoch': 16.2}


 55%|█████▍    | 820/1500 [08:26<02:51,  3.95it/s]

{'loss': 0.4274, 'grad_norm': 0.7329392433166504, 'learning_rate': 2.3620689655172415e-05, 'epoch': 16.4}


 55%|█████▌    | 830/1500 [08:30<03:00,  3.71it/s]

{'loss': 0.4185, 'grad_norm': 1.3718228340148926, 'learning_rate': 2.327586206896552e-05, 'epoch': 16.6}


 56%|█████▌    | 840/1500 [08:33<02:47,  3.95it/s]

{'loss': 0.43, 'grad_norm': 0.7202014327049255, 'learning_rate': 2.293103448275862e-05, 'epoch': 16.8}


 57%|█████▋    | 850/1500 [08:36<02:54,  3.73it/s]

{'loss': 0.378, 'grad_norm': 1.7171193361282349, 'learning_rate': 2.2586206896551727e-05, 'epoch': 17.0}


                                                  
 57%|█████▋    | 850/1500 [08:38<02:54,  3.73it/s]

{'eval_loss': 0.5285398960113525, 'eval_runtime': 1.017, 'eval_samples_per_second': 98.324, 'eval_steps_per_second': 12.782, 'epoch': 17.0}


 57%|█████▋    | 860/1500 [08:40<02:50,  3.76it/s]

{'loss': 0.427, 'grad_norm': 0.7219114899635315, 'learning_rate': 2.2241379310344828e-05, 'epoch': 17.2}


 58%|█████▊    | 870/1500 [08:44<02:49,  3.71it/s]

{'loss': 0.3399, 'grad_norm': 0.8503893613815308, 'learning_rate': 2.1896551724137932e-05, 'epoch': 17.4}


 59%|█████▊    | 880/1500 [08:46<02:36,  3.96it/s]

{'loss': 0.355, 'grad_norm': 0.6194727420806885, 'learning_rate': 2.1551724137931033e-05, 'epoch': 17.6}


 59%|█████▉    | 890/1500 [08:50<02:43,  3.73it/s]

{'loss': 0.4384, 'grad_norm': 0.8586394786834717, 'learning_rate': 2.120689655172414e-05, 'epoch': 17.8}


 60%|██████    | 900/1500 [08:53<02:31,  3.97it/s]

{'loss': 0.4121, 'grad_norm': 0.8078247904777527, 'learning_rate': 2.086206896551724e-05, 'epoch': 18.0}


                                                  
 60%|██████    | 900/1500 [08:54<02:31,  3.97it/s]

{'eval_loss': 0.5300240516662598, 'eval_runtime': 1.0161, 'eval_samples_per_second': 98.411, 'eval_steps_per_second': 12.793, 'epoch': 18.0}


 61%|██████    | 910/1500 [08:58<02:46,  3.55it/s]

{'loss': 0.4267, 'grad_norm': 3.1697514057159424, 'learning_rate': 2.0517241379310345e-05, 'epoch': 18.2}


 61%|██████▏   | 920/1500 [09:00<02:26,  3.95it/s]

{'loss': 0.4181, 'grad_norm': 0.6970576643943787, 'learning_rate': 2.017241379310345e-05, 'epoch': 18.4}


 62%|██████▏   | 930/1500 [09:04<02:32,  3.74it/s]

{'loss': 0.3709, 'grad_norm': 0.8735141158103943, 'learning_rate': 1.9827586206896554e-05, 'epoch': 18.6}


 63%|██████▎   | 940/1500 [09:06<02:22,  3.94it/s]

{'loss': 0.3564, 'grad_norm': 0.6426888108253479, 'learning_rate': 1.9482758620689655e-05, 'epoch': 18.8}


 63%|██████▎   | 950/1500 [09:11<02:29,  3.69it/s]

{'loss': 0.3871, 'grad_norm': 1.4034754037857056, 'learning_rate': 1.913793103448276e-05, 'epoch': 19.0}


                                                  
 63%|██████▎   | 950/1500 [09:12<02:29,  3.69it/s]

{'eval_loss': 0.5334317684173584, 'eval_runtime': 1.0188, 'eval_samples_per_second': 98.158, 'eval_steps_per_second': 12.761, 'epoch': 19.0}


 64%|██████▍   | 960/1500 [09:14<02:38,  3.41it/s]

{'loss': 0.4122, 'grad_norm': 1.5582900047302246, 'learning_rate': 1.8793103448275863e-05, 'epoch': 19.2}


 65%|██████▍   | 970/1500 [09:19<02:33,  3.44it/s]

{'loss': 0.3704, 'grad_norm': 0.874621570110321, 'learning_rate': 1.8448275862068967e-05, 'epoch': 19.4}


 65%|██████▌   | 980/1500 [09:22<02:23,  3.61it/s]

{'loss': 0.3803, 'grad_norm': 1.0659558773040771, 'learning_rate': 1.810344827586207e-05, 'epoch': 19.6}


 66%|██████▌   | 990/1500 [09:26<02:26,  3.47it/s]

{'loss': 0.347, 'grad_norm': 0.7975731492042542, 'learning_rate': 1.7758620689655172e-05, 'epoch': 19.8}


 67%|██████▋   | 1000/1500 [09:29<02:08,  3.90it/s]

{'loss': 0.4092, 'grad_norm': 0.6542364954948425, 'learning_rate': 1.7413793103448276e-05, 'epoch': 20.0}


                                                   
 67%|██████▋   | 1000/1500 [09:30<02:08,  3.90it/s]

{'eval_loss': 0.534136176109314, 'eval_runtime': 1.0305, 'eval_samples_per_second': 97.042, 'eval_steps_per_second': 12.615, 'epoch': 20.0}


 67%|██████▋   | 1010/1500 [09:34<02:19,  3.51it/s]

{'loss': 0.4233, 'grad_norm': 0.8758023381233215, 'learning_rate': 1.706896551724138e-05, 'epoch': 20.2}


 68%|██████▊   | 1020/1500 [09:36<02:02,  3.90it/s]

{'loss': 0.3631, 'grad_norm': 0.6619411706924438, 'learning_rate': 1.6724137931034485e-05, 'epoch': 20.4}


 69%|██████▊   | 1030/1500 [09:40<02:07,  3.68it/s]

{'loss': 0.3433, 'grad_norm': 0.7491041421890259, 'learning_rate': 1.6379310344827585e-05, 'epoch': 20.6}


 69%|██████▉   | 1040/1500 [09:43<01:57,  3.93it/s]

{'loss': 0.4094, 'grad_norm': 0.7577455043792725, 'learning_rate': 1.603448275862069e-05, 'epoch': 20.8}


 70%|███████   | 1050/1500 [09:47<02:02,  3.68it/s]

{'loss': 0.3771, 'grad_norm': 0.6848618984222412, 'learning_rate': 1.5689655172413794e-05, 'epoch': 21.0}


                                                   
 70%|███████   | 1050/1500 [09:48<02:02,  3.68it/s]

{'eval_loss': 0.534401535987854, 'eval_runtime': 1.01, 'eval_samples_per_second': 99.007, 'eval_steps_per_second': 12.871, 'epoch': 21.0}


 71%|███████   | 1060/1500 [09:51<01:56,  3.78it/s]

{'loss': 0.3678, 'grad_norm': 0.8414308428764343, 'learning_rate': 1.5344827586206898e-05, 'epoch': 21.2}


 71%|███████▏  | 1070/1500 [09:54<01:55,  3.72it/s]

{'loss': 0.4485, 'grad_norm': 0.834467351436615, 'learning_rate': 1.5e-05, 'epoch': 21.4}


 72%|███████▏  | 1080/1500 [09:57<01:46,  3.95it/s]

{'loss': 0.3774, 'grad_norm': 1.1040151119232178, 'learning_rate': 1.4655172413793103e-05, 'epoch': 21.6}


 73%|███████▎  | 1090/1500 [10:01<01:49,  3.74it/s]

{'loss': 0.3668, 'grad_norm': 0.9208842515945435, 'learning_rate': 1.4310344827586209e-05, 'epoch': 21.8}


 73%|███████▎  | 1100/1500 [10:03<01:40,  3.96it/s]

{'loss': 0.324, 'grad_norm': 0.6305163502693176, 'learning_rate': 1.3965517241379311e-05, 'epoch': 22.0}


                                                   
 73%|███████▎  | 1100/1500 [10:04<01:40,  3.96it/s]

{'eval_loss': 0.5355440378189087, 'eval_runtime': 1.0141, 'eval_samples_per_second': 98.609, 'eval_steps_per_second': 12.819, 'epoch': 22.0}


 74%|███████▍  | 1110/1500 [10:08<01:49,  3.57it/s]

{'loss': 0.3945, 'grad_norm': 0.8663713932037354, 'learning_rate': 1.3620689655172414e-05, 'epoch': 22.2}


 75%|███████▍  | 1120/1500 [10:11<01:36,  3.93it/s]

{'loss': 0.3661, 'grad_norm': 0.7169099450111389, 'learning_rate': 1.3275862068965516e-05, 'epoch': 22.4}


 75%|███████▌  | 1130/1500 [10:14<01:39,  3.71it/s]

{'loss': 0.3685, 'grad_norm': 0.5304214954376221, 'learning_rate': 1.2931034482758622e-05, 'epoch': 22.6}


 76%|███████▌  | 1140/1500 [10:17<01:31,  3.95it/s]

{'loss': 0.3717, 'grad_norm': 1.1197963953018188, 'learning_rate': 1.2586206896551725e-05, 'epoch': 22.8}


 77%|███████▋  | 1150/1500 [10:21<01:33,  3.74it/s]

{'loss': 0.3896, 'grad_norm': 1.0218079090118408, 'learning_rate': 1.2241379310344827e-05, 'epoch': 23.0}


                                                   
 77%|███████▋  | 1150/1500 [10:22<01:33,  3.74it/s]

{'eval_loss': 0.5367036461830139, 'eval_runtime': 1.0154, 'eval_samples_per_second': 98.479, 'eval_steps_per_second': 12.802, 'epoch': 23.0}


 77%|███████▋  | 1160/1500 [10:24<01:30,  3.76it/s]

{'loss': 0.3414, 'grad_norm': 0.7090765237808228, 'learning_rate': 1.1896551724137931e-05, 'epoch': 23.2}


 78%|███████▊  | 1170/1500 [10:28<01:29,  3.69it/s]

{'loss': 0.3972, 'grad_norm': 0.755588948726654, 'learning_rate': 1.1551724137931034e-05, 'epoch': 23.4}


 79%|███████▊  | 1180/1500 [10:31<01:21,  3.93it/s]

{'loss': 0.3445, 'grad_norm': 0.6970490217208862, 'learning_rate': 1.1206896551724138e-05, 'epoch': 23.6}


 79%|███████▉  | 1190/1500 [10:34<01:22,  3.74it/s]

{'loss': 0.3581, 'grad_norm': 0.8294486999511719, 'learning_rate': 1.0862068965517242e-05, 'epoch': 23.8}


 80%|████████  | 1200/1500 [10:37<01:15,  3.98it/s]

{'loss': 0.4073, 'grad_norm': 0.9143770337104797, 'learning_rate': 1.0517241379310346e-05, 'epoch': 24.0}


                                                   
 80%|████████  | 1200/1500 [10:38<01:15,  3.98it/s]

{'eval_loss': 0.5380178689956665, 'eval_runtime': 1.012, 'eval_samples_per_second': 98.817, 'eval_steps_per_second': 12.846, 'epoch': 24.0}


 81%|████████  | 1210/1500 [10:42<01:21,  3.54it/s]

{'loss': 0.3729, 'grad_norm': 0.621496319770813, 'learning_rate': 1.0172413793103449e-05, 'epoch': 24.2}


 81%|████████▏ | 1220/1500 [10:44<01:10,  3.95it/s]

{'loss': 0.3569, 'grad_norm': 0.7794933319091797, 'learning_rate': 9.827586206896553e-06, 'epoch': 24.4}


 82%|████████▏ | 1230/1500 [10:48<01:12,  3.72it/s]

{'loss': 0.3455, 'grad_norm': 0.6758860945701599, 'learning_rate': 9.482758620689655e-06, 'epoch': 24.6}


 83%|████████▎ | 1240/1500 [10:51<01:05,  3.94it/s]

{'loss': 0.3915, 'grad_norm': 1.330836534500122, 'learning_rate': 9.13793103448276e-06, 'epoch': 24.8}


 83%|████████▎ | 1250/1500 [10:54<01:07,  3.72it/s]

{'loss': 0.4073, 'grad_norm': 0.6535670757293701, 'learning_rate': 8.793103448275862e-06, 'epoch': 25.0}


                                                   
 83%|████████▎ | 1250/1500 [10:55<01:07,  3.72it/s]

{'eval_loss': 0.5373029708862305, 'eval_runtime': 1.0147, 'eval_samples_per_second': 98.549, 'eval_steps_per_second': 12.811, 'epoch': 25.0}


 84%|████████▍ | 1260/1500 [10:58<01:03,  3.78it/s]

{'loss': 0.3121, 'grad_norm': 0.8352802991867065, 'learning_rate': 8.448275862068966e-06, 'epoch': 25.2}


 85%|████████▍ | 1270/1500 [11:02<01:02,  3.69it/s]

{'loss': 0.386, 'grad_norm': 0.9254090785980225, 'learning_rate': 8.103448275862069e-06, 'epoch': 25.4}


 85%|████████▌ | 1280/1500 [11:04<00:55,  3.94it/s]

{'loss': 0.3822, 'grad_norm': 1.6826364994049072, 'learning_rate': 7.758620689655173e-06, 'epoch': 25.6}


 86%|████████▌ | 1290/1500 [11:08<00:56,  3.75it/s]

{'loss': 0.3799, 'grad_norm': 0.779276430606842, 'learning_rate': 7.413793103448275e-06, 'epoch': 25.8}


 87%|████████▋ | 1300/1500 [11:11<00:50,  3.96it/s]

{'loss': 0.3786, 'grad_norm': 0.9156877994537354, 'learning_rate': 7.0689655172413796e-06, 'epoch': 26.0}


                                                   
 87%|████████▋ | 1300/1500 [11:12<00:50,  3.96it/s]

{'eval_loss': 0.5371915698051453, 'eval_runtime': 1.0159, 'eval_samples_per_second': 98.44, 'eval_steps_per_second': 12.797, 'epoch': 26.0}


 87%|████████▋ | 1310/1500 [11:15<00:53,  3.55it/s]

{'loss': 0.3666, 'grad_norm': 1.0066978931427002, 'learning_rate': 6.724137931034483e-06, 'epoch': 26.2}


 88%|████████▊ | 1320/1500 [11:18<00:45,  3.96it/s]

{'loss': 0.3685, 'grad_norm': 1.2304773330688477, 'learning_rate': 6.379310344827587e-06, 'epoch': 26.4}


 89%|████████▊ | 1330/1500 [11:22<00:45,  3.73it/s]

{'loss': 0.3882, 'grad_norm': 0.8328630924224854, 'learning_rate': 6.03448275862069e-06, 'epoch': 26.6}


 89%|████████▉ | 1340/1500 [11:24<00:40,  3.93it/s]

{'loss': 0.3513, 'grad_norm': 0.6775132417678833, 'learning_rate': 5.689655172413794e-06, 'epoch': 26.8}


 90%|█████████ | 1350/1500 [11:28<00:40,  3.71it/s]

{'loss': 0.3551, 'grad_norm': 0.6692936420440674, 'learning_rate': 5.344827586206897e-06, 'epoch': 27.0}


                                                   
 90%|█████████ | 1350/1500 [11:29<00:40,  3.71it/s]

{'eval_loss': 0.5382014513015747, 'eval_runtime': 1.0204, 'eval_samples_per_second': 98.0, 'eval_steps_per_second': 12.74, 'epoch': 27.0}


 91%|█████████ | 1360/1500 [11:32<00:37,  3.75it/s]

{'loss': 0.3791, 'grad_norm': 0.9778993129730225, 'learning_rate': 5e-06, 'epoch': 27.2}


 91%|█████████▏| 1370/1500 [11:35<00:34,  3.73it/s]

{'loss': 0.3308, 'grad_norm': 1.1025352478027344, 'learning_rate': 4.655172413793104e-06, 'epoch': 27.4}


 92%|█████████▏| 1380/1500 [11:38<00:30,  3.96it/s]

{'loss': 0.3382, 'grad_norm': 1.1463438272476196, 'learning_rate': 4.310344827586207e-06, 'epoch': 27.6}


 93%|█████████▎| 1390/1500 [11:42<00:29,  3.72it/s]

{'loss': 0.3938, 'grad_norm': 1.1104316711425781, 'learning_rate': 3.96551724137931e-06, 'epoch': 27.8}


 93%|█████████▎| 1400/1500 [11:44<00:25,  3.96it/s]

{'loss': 0.3681, 'grad_norm': 0.8955211639404297, 'learning_rate': 3.620689655172414e-06, 'epoch': 28.0}


                                                   
 93%|█████████▎| 1400/1500 [11:45<00:25,  3.96it/s]

{'eval_loss': 0.5389742255210876, 'eval_runtime': 1.0157, 'eval_samples_per_second': 98.451, 'eval_steps_per_second': 12.799, 'epoch': 28.0}


 94%|█████████▍| 1410/1500 [11:49<00:25,  3.56it/s]

{'loss': 0.3861, 'grad_norm': 0.7936801910400391, 'learning_rate': 3.2758620689655175e-06, 'epoch': 28.2}


 95%|█████████▍| 1420/1500 [11:52<00:20,  3.96it/s]

{'loss': 0.3688, 'grad_norm': 1.1484991312026978, 'learning_rate': 2.931034482758621e-06, 'epoch': 28.4}


 95%|█████████▌| 1430/1500 [11:55<00:18,  3.74it/s]

{'loss': 0.3683, 'grad_norm': 1.0133377313613892, 'learning_rate': 2.586206896551724e-06, 'epoch': 28.6}


 96%|█████████▌| 1440/1500 [11:58<00:15,  3.95it/s]

{'loss': 0.3498, 'grad_norm': 0.6056205034255981, 'learning_rate': 2.2413793103448275e-06, 'epoch': 28.8}


 97%|█████████▋| 1450/1500 [12:02<00:13,  3.72it/s]

{'loss': 0.3546, 'grad_norm': 0.6524468660354614, 'learning_rate': 1.896551724137931e-06, 'epoch': 29.0}


                                                   
 97%|█████████▋| 1450/1500 [12:03<00:13,  3.72it/s]

{'eval_loss': 0.5391480922698975, 'eval_runtime': 1.0178, 'eval_samples_per_second': 98.255, 'eval_steps_per_second': 12.773, 'epoch': 29.0}


 97%|█████████▋| 1460/1500 [12:05<00:10,  3.75it/s]

{'loss': 0.3752, 'grad_norm': 0.6573004126548767, 'learning_rate': 1.5517241379310346e-06, 'epoch': 29.2}


 98%|█████████▊| 1470/1500 [12:09<00:08,  3.73it/s]

{'loss': 0.3642, 'grad_norm': 0.8677975535392761, 'learning_rate': 1.206896551724138e-06, 'epoch': 29.4}


 99%|█████████▊| 1480/1500 [12:12<00:05,  3.95it/s]

{'loss': 0.3818, 'grad_norm': 1.1790987253189087, 'learning_rate': 8.620689655172415e-07, 'epoch': 29.6}


 99%|█████████▉| 1490/1500 [12:15<00:02,  3.74it/s]

{'loss': 0.3723, 'grad_norm': 0.8958055973052979, 'learning_rate': 5.172413793103449e-07, 'epoch': 29.8}


100%|██████████| 1500/1500 [12:18<00:00,  3.93it/s]

{'loss': 0.3436, 'grad_norm': 1.0837959051132202, 'learning_rate': 1.7241379310344828e-07, 'epoch': 30.0}


                                                   
100%|██████████| 1500/1500 [12:19<00:00,  3.93it/s]

{'eval_loss': 0.5391696691513062, 'eval_runtime': 1.0137, 'eval_samples_per_second': 98.645, 'eval_steps_per_second': 12.824, 'epoch': 30.0}


100%|██████████| 1500/1500 [12:20<00:00,  2.03it/s]

{'train_runtime': 740.6976, 'train_samples_per_second': 16.201, 'train_steps_per_second': 2.025, 'train_loss': 0.949371881643931, 'epoch': 30.0}





In [69]:
def summarize_text_finetuned(text, max_input_length=512, max_output_length=10):
    """
    Summarizes input text using a T5 model.
    """
    # Add the "summarize:" prefix for T5
    input_text = text.strip()
    
    # Tokenize the input text
    inputs = tokenizer.encode(input_text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
    
    # Generate the summary
    outputs = trainer.model.generate(inputs, max_length=max_output_length, min_length=1, length_penalty=1.0, num_beams=4)
    
    # Decode the summary
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

In [36]:
tokenizer.save_pretrained("Finetuned_T5/tokenizer")
trainer.model.save_pretrained("Finetuned_T5/model")

In [208]:
dataset_train["generated_summary"] = dataset_train["preprocessed_text"].apply(summarize_text)

In [70]:
dataset_test["generated_summary"] = dataset_test["preprocessed_text"].apply(summarize_text_finetuned)

In [72]:
dataset_test["generated_summary"]

0                      Not my cup of coffee
1              Maltitol is an alcohol sugar
2             Best chicken noodle soup ever
3               Great for green tomato jam!
4        A nice alternative to an apple pie
                      ...                  
95              Great for the lowest price!
96            My dog has a ton of allergies
97                    No tea flavor at all.
98    My favorite dairy free flavored chips
99                     Strawberry Twizzlers
Name: generated_summary, Length: 100, dtype: object

In [73]:
results = rouge.compute(
    predictions=dataset_test["generated_summary"].tolist(),
    references=dataset_test["Summary"].tolist()
)

In [74]:
for metric, score in results.items():
    print(f"{metric}: {score:.4f}")

rouge1: 0.1563
rouge2: 0.0403
rougeL: 0.1469
rougeLsum: 0.1462


There's an increase in ROUGE score.
Since it's t5-small it's to get score more than this