In [2]:
import os
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from rouge_score import rouge_scorer
from sacrebleu import corpus_bleu

# Paths to the pre-trained and trained models
pretrained_model_name = "google/pegasus-xsum"
trained_model_dir = r"E:\NIKHIL\ML\Text Summerizer Using Deep Learning\models\pegasus-fine_tuned_model"

# Function to check if the trained model exists
def model_exists(model_dir):
    return os.path.exists(model_dir) and os.path.isdir(model_dir)

# Load the tokenizer
tokenizer = PegasusTokenizer.from_pretrained(pretrained_model_name)

# Load the appropriate model
if model_exists(trained_model_dir):
    print("Loading trained model...")
    model = PegasusForConditionalGeneration.from_pretrained(trained_model_dir)
else:
    print("Loading pre-trained model...")
    model = PegasusForConditionalGeneration.from_pretrained(pretrained_model_name)

# Load the XSUM dataset
xsum_dataset = load_dataset("xsum")

# Select a small portion of the dataset for evaluation
eval_dataset = xsum_dataset["train"].select(range(10))  # Reduce to first 10 records for lower power usage

def preprocess_function(examples):
    inputs = tokenizer(examples["document"], truncation=True, padding="max_length", max_length=512)
    targets = tokenizer(examples["summary"], truncation=True, padding="max_length", max_length=128)
    inputs["labels"] = targets["input_ids"]
    return inputs

# Apply the preprocessing function to the evaluation dataset
eval_dataset = eval_dataset.map(preprocess_function, batched=True)

# Remove columns not needed for training
eval_dataset = eval_dataset.remove_columns(["document", "summary"])

def compute_metrics(pred):
    """Calculates ROUGE and SacreBLEU scores."""
    predictions, labels = pred.predictions, pred.label_ids

    if isinstance(predictions, tuple):
        predictions = predictions[0]

    pred_ids = predictions.argmax(-1)  # Get the predicted token IDs

    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_str = tokenizer.batch_decode(labels, skip_special_tokens=True)

    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = [rouge.score(l, p) for l, p in zip(labels_str, pred_str)]

    bleu = corpus_bleu(pred_str, [labels_str])

    avg_rouge = {
        'rouge1': sum([score['rouge1'].fmeasure for score in rouge_scores]) / len(rouge_scores),
        'rouge2': sum([score['rouge2'].fmeasure for score in rouge_scores]) / len(rouge_scores),
        'rougeL': sum([score['rougeL'].fmeasure for score in rouge_scores]) / len(rouge_scores),
    }

    # Logging scalar values only
    return {"eval_rouge1": avg_rouge['rouge1'], "eval_rouge2": avg_rouge['rouge2'], "eval_rougeL": avg_rouge['rougeL'], "eval_sacrebleu": bleu.score}

# Define the training arguments with a smaller batch size
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=2,  # Smaller batch size for lower power usage
    logging_dir="./logs",
    logging_steps=10,
    disable_tqdm=True
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,  # Pass your custom `compute_metrics` function
    eval_dataset=eval_dataset
)

# Evaluate the model
results = trainer.evaluate()
print(results)  # This will print a dictionary containing various metrics (loss, ROUGE, etc.)


Loading trained model...
{'eval_rouge1': 0.723386536257917, 'eval_rouge2': 0.5265285300068526, 'eval_rougeL': 0.7139567116965135, 'eval_sacrebleu': 9.116300325856162, 'eval_loss': 10.70068359375, 'eval_runtime': 22.8694, 'eval_samples_per_second': 0.437, 'eval_steps_per_second': 0.219}
{'eval_rouge1': 0.723386536257917, 'eval_rouge2': 0.5265285300068526, 'eval_rougeL': 0.7139567116965135, 'eval_sacrebleu': 9.116300325856162, 'eval_loss': 10.70068359375, 'eval_runtime': 22.8694, 'eval_samples_per_second': 0.437, 'eval_steps_per_second': 0.219}
