In [1]:
from datasets import load_from_disk
train_data = load_from_disk("../data/tokenized_mts_summarizer_train")
eval_data = load_from_disk("../data/tokenized_mts_summarizer_eval")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data

Dataset({
    features: ['ID', 'section_header', 'target', 'input', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 1161
})

In [3]:
eval_data

Dataset({
    features: ['ID', 'section_header', 'target', 'input', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 129
})

In [4]:
# Load Model & Tokenizer

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_checkpoint = "facebook/bart-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [5]:
import evaluate
import numpy as np

rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # Safely unpack logits if needed
    if isinstance(predictions, tuple):
        predictions = predictions[0]

    # Convert logits to predicted token IDs
    pred_ids = np.argmax(predictions, axis=-1)

    # Decode predictions
    decoded_preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)

    # Replace -100 in labels (ignored positions)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute ROUGE
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    
    # Handle both flat (new) and object (old) outputs
    final_result = {}
    for key, value in result.items():
        if hasattr(value, 'mid'):
            final_result[key] = value.mid.fmeasure * 100
        else:
            final_result[key] = value * 100

    return final_result


In [6]:
# Trainer setup

from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir="../model/final_summarizer",
    eval_strategy="steps",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=1,
    logging_dir="../model/logs",
    logging_steps=10,
    eval_steps=50,
    warmup_steps=10,
    save_steps=1e6,
    gradient_accumulation_steps=8,
    weight_decay=0.01,
    report_to="none"

)

In [7]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [8]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,  
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [9]:
# Small Quick Eval Test 
small_eval = eval_data.select(range(10))
small_metrics = trainer.evaluate(eval_dataset=small_eval)
print("Quick ROUGE Metrics on First 10 Eval Samples:")
print(small_metrics)



Quick ROUGE Metrics on First 10 Eval Samples:
{'eval_loss': 12.643924713134766, 'eval_model_preparation_time': 0.002, 'eval_rouge1': 20.372448391347486, 'eval_rouge2': 2.7696078431372553, 'eval_rougeL': 16.417015028732212, 'eval_rougeLsum': 17.522674679800673, 'eval_runtime': 2.5661, 'eval_samples_per_second': 3.897, 'eval_steps_per_second': 3.897}


In [10]:
trainer.train()
trainer.save_model("../model/final_summarizer")

Step,Training Loss,Validation Loss,Model Preparation Time,Rouge1,Rouge2,Rougel,Rougelsum
50,2.8586,2.04294,0.002,65.90439,41.49479,61.063269,61.477632
100,1.3283,0.916914,0.002,72.173992,52.711908,69.763576,70.654123




In [11]:
metrics = trainer.evaluate(eval_dataset=eval_data)
print(metrics)



{'eval_loss': 0.7878283858299255, 'eval_model_preparation_time': 0.002, 'eval_rouge1': 73.99312078867789, 'eval_rouge2': 54.91614211410053, 'eval_rougeL': 71.33775354163181, 'eval_rougeLsum': 72.21309745976477, 'eval_runtime': 91.7399, 'eval_samples_per_second': 1.406, 'eval_steps_per_second': 1.406, 'epoch': 1.0}
