In [1]:
! pip install transformers datasets evaluate rouge_score

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ev

In [59]:
from datasets import load_dataset

dataset = load_dataset("sujayC66/text_summarization_512_length_1_2000",split="train[:1000]")

README.md:   0%|          | 0.00/416 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.48M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/320k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1683 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/297 [00:00<?, ? examples/s]

In [60]:
dataset

Dataset({
    features: ['content', 'summary'],
    num_rows: 1000
})

In [61]:
dataset = dataset.train_test_split(test_size=0.2)

In [62]:
dataset

DatasetDict({
    train: Dataset({
        features: ['content', 'summary'],
        num_rows: 800
    })
    test: Dataset({
        features: ['content', 'summary'],
        num_rows: 200
    })
})

In [63]:
dataset["train"][0]

{'content': "Bengaluru, Jan 13 (IANS) A high-level delegation led by Heavy and Medium Industries Minister M.B. Patil will leave on January 14 to attend the World Economic Forum-2024 annual meet which will be held in Davos, Switzerland from January 15 to 19.Patil said that during the meeting, the delegation will draw the attention of businessmen over to the Karnataka government's business-friendly policies, evolved ecosystem, potential areas for investment, availability of human resources, priority to imbibe skills among students, and efficient single window system.He said that the delegation will hold all the meetings in Davos under the ‘Innovation Will Impact’ theme.“Our government aims to develop Karnataka as the leading manufacturing hub of Asia. We are also focussing on areas such as semiconductor, electric automotive, space and defense, clean energy, research and development and artificial intelligence,” the minister said.He said that the delegation’s aim is to attract more invest

##Model 1

In [64]:
from transformers import AutoTokenizer

checkpoint = "t5-small" #https://huggingface.co/t5-small
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [68]:
prefix = "summarize: "


def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["content"]]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True)

    labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [69]:
tokenized_text = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [70]:
tokenized_text

DatasetDict({
    train: Dataset({
        features: ['content', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 800
    })
    test: Dataset({
        features: ['content', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 200
    })
})

In [71]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)

In [72]:
!pip install evaluate



In [73]:
import evaluate

rouge = evaluate.load("rouge")

In [74]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [75]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

In [76]:
# prompt: huggignface login

!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineG

In [77]:
training_args = Seq2SeqTrainingArguments(
    output_dir="summarize_model",
    evaluation_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.05,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
    push_to_hub=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_text["train"],
    eval_dataset=tokenized_text["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

  trainer = Seq2SeqTrainer(


In [78]:
#start training
trainer.train()

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,No log,1.323217,0.2666,0.1487,0.2377,0.2362,18.97
2,No log,1.248057,0.2759,0.1548,0.2454,0.245,18.97
3,No log,1.219131,0.2821,0.1617,0.2503,0.2503,18.97
4,No log,1.210833,0.2829,0.1626,0.2513,0.2512,18.97




TrainOutput(global_step=200, training_loss=1.4317457580566406, metrics={'train_runtime': 148.6713, 'train_samples_per_second': 21.524, 'train_steps_per_second': 1.345, 'total_flos': 543058978406400.0, 'train_loss': 1.4317457580566406, 'epoch': 4.0})

In [86]:
content = "PALO ALTO - In a significant move to bolster its cloud services and artificial intelligence capabilities, Hewlett Packard Enterprise (NYSE:HPE) has entered into a definitive agreement to acquire Juniper Networks (NYSE:JNPR) at a purchase price of $40 per share, which equates to an equity value of approximately $14 billion. This strategic acquisition is set to enhance HPE's portfolio by integrating Juniper’s advanced Mist AI technology with the assets of HPE Aruba Networking.The merger, which is anticipated to be finalized between late 2024 and early 2025, is currently subject to customary closing conditions and regulatory approvals. Once completed, this deal is poised to position HPE as a player in the realm of secure unified cloud services and AI-native networking solutions. The move is seen as a response to the growing IT trends that emphasize artificial intelligence and hybrid cloud environments, with the goal of delivering comprehensive technology solutions that span from edge devices to cloud infrastructure.Rami Rahim, the current CEO of Juniper Networks, is set to lead the expanded networking sector within HPE after the transaction is completed. The financing strategy for this ambitious acquisition includes securing $14 billion in term loans, alongside plans for future refinancing to manage the investment effectively.This article was generated with the support of AI and reviewed by an editor. For more information see our T&C."

In [88]:
inputs = tokenizer(content, return_tensors="pt").input_ids

In [90]:
outputs = model.generate(inputs.cuda(), max_new_tokens=100, do_sample=False)

In [91]:
outputs

tensor([[    0, 32099,    12,  7464, 12170,   883,  3426,     7,    41, 23397,
            10,   683,   567,  5554,    61,    44,     3,     9,  1242,   594,
            13, 23853,   399,   698,     6,    84,     3, 25875,     7,    12,
            46,  8408,   701,    13,  3241, 26845,  2108,     5,    37,  6566,
            19,  1644,    12,  8726,  5481,   427,    31,     7,  3126,   364,
            11,  7353,  6123,  5644,    57,     3, 20030, 12170,   883,    22,
             7,  8306,    17,  7833,   748,    28,  5481,   427,  1533, 17309,
          3426,    53,     5,     1]], device='cuda:0')

In [92]:
#Decode the generated token ids back into text:
tokenizer.decode(outputs[0], skip_special_tokens=True)

"to acquire Juniper Networks (NYSE:JNPR) at a purchase price of $40 per share, which equates to an equity value of approximately $14 billion. The acquisition is expected to strengthen HPE's cloud services and artificial intelligence capabilities by integrating Juniper’s Mist AI technology with HPE Aruba Networking."