In [1]:
!pip install datasets

[0m

In [2]:
from datasets import load_dataset

dataset = load_dataset("EdinburghNLP/xsum")

In [3]:
# Define sample sizes
train_sample_size = 1000  # Adjust as needed
valid_sample_size = 200

# Randomly shuffle and select
train_subset = dataset["train"].shuffle(seed=42).select(range(train_sample_size))
valid_subset = dataset["validation"].shuffle(seed=42).select(range(valid_sample_size))

print(train_subset, valid_subset)


Dataset({
    features: ['document', 'summary', 'id'],
    num_rows: 1000
}) Dataset({
    features: ['document', 'summary', 'id'],
    num_rows: 200
})


In [7]:
from transformers import AutoTokenizer

model_checkpoint = "facebook/bart-large-cnn"  # Change this for other models like t5-small or google/pegasus-xsum
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def preprocess_function(examples):
    inputs = examples["document"]
    targets = examples["summary"]
    
    # Tokenize inputs and summaries
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=70, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_train = train_subset.map(preprocess_function, batched=True)
tokenized_valid = valid_subset.map(preprocess_function, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [8]:
test_sample_size = 100
test_subset = dataset["test"].shuffle(seed=42).select(range(test_sample_size))

In [9]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

In [10]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [11]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    tokenizer=tokenizer,
)

trainer.train()

2025-04-01 05:55:31.800709: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-01 05:55:31.800709: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-01 05:55:31.801768: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-01 05:55:31.812161: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Using the `WANDB_DISABLED` environment variable is de

Epoch,Training Loss,Validation Loss
1,1.03,0.837885
2,0.4227,0.891701
3,0.1745,1.094819


TrainOutput(global_step=1500, training_loss=0.5423719635009766, metrics={'train_runtime': 1375.4226, 'train_samples_per_second': 2.181, 'train_steps_per_second': 1.091, 'total_flos': 6501313806336000.0, 'train_loss': 0.5423719635009766, 'epoch': 3.0})

In [12]:
!pip install bert_score rouge_score evaluate

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m

In [13]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import evaluate

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained("results/checkpoint-1000")
model = AutoModelForSeq2SeqLM.from_pretrained("results/checkpoint-1000").to("cuda").half()
model.eval()  # Set model to evaluation mode

def generate_answer(batch):
    # Tokenize the inputs with truncation
    inputs = tokenizer(batch["document"], padding="max_length", max_length=1024, return_tensors="pt", truncation=True)
    input_ids = inputs.input_ids.to("cuda")
    attention_mask = inputs.attention_mask.to("cuda")

    with torch.no_grad():  # Disable gradient calculation for inference
        predicted_ids = model.generate(input_ids, attention_mask=attention_mask)
    
    batch["predicted_summary"] = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)
    return batch

# Apply summarization function
test_data = test_subset.map(generate_answer, batched=True, batch_size=4)

# Load ROUGE metric
rouge = evaluate.load("rouge")

# Compute ROUGE score for the entire dataset
rouge_scores = rouge.compute(predictions=test_data["predicted_summary"], references=test_subset["summary"])


# Calculate the average ROUGE scores (F1-score, precision, recall)
avg_rouge = {
    "rouge1_f1": rouge_scores["rouge1"],
    "rouge2_f1": rouge_scores["rouge2"],
    "rougeL_f1": rouge_scores["rougeL"]
}

print(f"Average ROUGE Scores: {avg_rouge}")

# Now let's calculate BERTScore

# Load BERTScore metric
bertscore = evaluate.load("bertscore")

# Compute BERTScore for the entire dataset
bertscore_results = bertscore.compute(predictions=test_data["predicted_summary"], references=test_subset["summary"], lang="en")


# Calculate the average BERTScore (F1-score, precision, recall)
avg_bertscore = {
    "precision": sum(bertscore_results["precision"]) / len(bertscore_results["precision"]),
    "recall": sum(bertscore_results["recall"]) / len(bertscore_results["recall"]),
    "f1": sum(bertscore_results["f1"]) / len(bertscore_results["f1"])
}

print(f"Average BERTScore: {avg_bertscore}")


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Average ROUGE Scores: {'rouge1_f1': 0.27812040419995043, 'rouge2_f1': 0.09386065001259182, 'rougeL_f1': 0.19941973693805942}


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Average BERTScore: {'precision': 0.8635722690820694, 'recall': 0.8970527470111846, 'f1': 0.8798836487531662}


In [14]:
for i in range(5):
    print(f"Example {i+1}:")
    print(f"Actual Summary: {test_data['summary'][i]}")
    print(f"Predicted Summary: {test_data['predicted_summary'][i]}")
    print("-" * 100)

Example 1:
Actual Summary: A woman who was seriously hurt in a fatal hen party motorway crash is now helping other major trauma victims rebuild their lives.
Predicted Summary: A woman who was seriously injured in a crash which killed her friend has spoken publicly for the first time since the incident to thank a charity for helping her cope with the trauma caused by a fatal crash in which left her "giddy and giddy" on the way to a hen party.
----------------------------------------------------------------------------------------------------
Example 2:
Actual Summary: A Tudor manor house has reopened following a £2.2m makeover.
Predicted Summary: A £1.6m renovation of a 19th Century timber-framed hall known as the "jewel in the crown" of a family estate has opened to the public after months of work by the council and the Heritage Lottery Fund, it has been announced.
----------------------------------------------------------------------------------------------------
Example 3:
Actual Sum

In [15]:
import pandas as pd

# Prepare lists to store data
actual_summaries = []
predicted_summaries = []

# Collect summaries into the lists
for i in range(100):
    actual_summaries.append(test_data['summary'][i])
    predicted_summaries.append(test_data['predicted_summary'][i])

# Create a DataFrame
df = pd.DataFrame({
    'Actual Summary': actual_summaries,
    'Predicted Summary': predicted_summaries
})

# Save the DataFrame to a CSV file
df.to_csv("summaries_comparison.csv", index=False)

print("CSV file 'summaries_comparison.csv' has been saved.")


CSV file 'summaries_comparison.csv' has been saved.
