Bulk Model Evaluation

In [1]:
import os
import logging
from datetime import datetime
import pandas as pd
import numpy as np
import torch
import evaluate
import unicodedata
from datasets import Dataset
from torch.utils.data import DataLoader
from transformers import (
    MBartForConditionalGeneration,
    MBart50TokenizerFast
)
from tqdm import tqdm

# --- Configuration ---
MODEL_PATH = "mbart-large-50-cnn-summarizer-v14/final_model"
EVAL_DATA_PATH = "../Dataset/filtered_articles_CNN.csv" 
BATCH_SIZE = 16 
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Set to a number for a quick evaluation on a subset, or None for the full dataset ---
NUM_EVAL_SAMPLES = 200

# --- Setup Logging ---
log_filename = f"bulk_evaluation_log_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] - %(message)s", handlers=[logging.FileHandler(log_filename), logging.StreamHandler()])

def sanitize_text(text):
    if not isinstance(text, str): return ""
    return text.replace('""', '"').strip()

def normalize_text(text):
    if not isinstance(text, str): return ""
    return ' '.join(unicodedata.normalize('NFKC', text).split())

def main():
    try:
        logging.info("--- Starting Bulk Evaluation with Custom Inference Loop ---")
        
        # 1. --- Load Model and Tokenizer ---
        logging.info(f"Loading model from: {MODEL_PATH}")
        model = MBartForConditionalGeneration.from_pretrained(MODEL_PATH).to(DEVICE)
        tokenizer = MBart50TokenizerFast.from_pretrained(MODEL_PATH)
        model.eval() 
        
        # 2. --- Load and Prepare the Evaluation Dataset ---
        logging.info(f"Loading evaluation data from: {EVAL_DATA_PATH}")
        df_eval = pd.read_csv(EVAL_DATA_PATH, engine='python', on_bad_lines='skip')
        df_eval.dropna(subset=['raw_news_article', 'english_summary', 'hindi_summary'], inplace=True)
        
        for col in ['raw_news_article', 'english_summary', 'hindi_summary']:
            df_eval[col] = df_eval[col].apply(sanitize_text).apply(normalize_text)
        
        eval_dataset = Dataset.from_pandas(df_eval)

        # --- NEW: Logic to select a subset for evaluation ---
        if NUM_EVAL_SAMPLES and NUM_EVAL_SAMPLES < len(eval_dataset):
            logging.warning(f"Using a subset of {NUM_EVAL_SAMPLES} examples for quick evaluation.")
            eval_dataset = eval_dataset.select(range(NUM_EVAL_SAMPLES))

        logging.info(f"Evaluation data prepared with {len(eval_dataset)} examples.")

        data_loader = DataLoader(eval_dataset, batch_size=BATCH_SIZE)

        # 3. --- Run Custom Inference Loop ---
        generated_eng_summaries = []
        generated_hin_summaries = []
        reference_eng_summaries = []
        reference_hin_summaries = []

        logging.info("Generating summaries for the dataset...")
        for batch in tqdm(data_loader, desc="Summarizing Batches"):
            articles = batch['raw_news_article']
            
            tokenizer.src_lang = "en_XX"
            inputs = tokenizer(articles, return_tensors="pt", max_length=1024, truncation=True, padding=True).to(DEVICE)
            
            NUM_BEAMS = 10
            NO_REPEAT_NGRAM_SIZE = 3
            REPETITION_PENALTY = 3.0
            LENGTH_PENALTY = 1.0
            DO_SAMPLE = True
            EARLY_STOPPING = True
            TOP_K = 50
            TOP_P = 0.95
            TEMPERATURE = 0.8
            
            input_word_count = np.mean([len(art.split()) for art in articles])
            ratio = 0.25 
            min_len = max(40, int(input_word_count * (ratio - 0.1))) 
            max_len = min(256, int(input_word_count * (ratio + 0.1)))

            eng_summary_ids = model.generate(
                inputs.input_ids, num_beams=NUM_BEAMS, max_length=max_len, min_length=min_len,
                length_penalty=LENGTH_PENALTY, no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE,
                repetition_penalty=REPETITION_PENALTY, do_sample=DO_SAMPLE, early_stopping=EARLY_STOPPING,
                top_k=TOP_K, top_p=TOP_P, temperature=TEMPERATURE,
                forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"]
            )
            batch_eng_summaries = tokenizer.batch_decode(eng_summary_ids, skip_special_tokens=True)
            
            hin_summary_ids = model.generate(
                inputs.input_ids, num_beams=NUM_BEAMS, max_length=max_len, min_length=min_len,
                length_penalty=LENGTH_PENALTY, no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE,
                repetition_penalty=REPETITION_PENALTY, do_sample=DO_SAMPLE, early_stopping=EARLY_STOPPING,
                top_k=TOP_K, top_p=TOP_P, temperature=TEMPERATURE,
                forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"]
            )
            batch_hin_summaries = tokenizer.batch_decode(hin_summary_ids, skip_special_tokens=True)
            
            generated_eng_summaries.extend(batch_eng_summaries)
            generated_hin_summaries.extend(batch_hin_summaries)
            reference_eng_summaries.extend(batch['english_summary'])
            reference_hin_summaries.extend(batch['hindi_summary'])

        # 4. --- Calculate and Print Final Metrics ---
        logging.info("Summarization complete. Calculating final metrics...")
        rouge_metric = evaluate.load("rouge")
        bleurt_metric = evaluate.load("bleurt", "bleurt-20")

        all_generated_summaries = generated_eng_summaries + generated_hin_summaries
        all_reference_summaries = reference_eng_summaries + reference_hin_summaries

        rouge_results = rouge_metric.compute(predictions=all_generated_summaries, references=all_reference_summaries)
        bleurt_results = bleurt_metric.compute(predictions=all_generated_summaries, references=all_reference_summaries)

        final_metrics = {
            "rouge1": rouge_results["rouge1"] * 100,
            "rouge2": rouge_results["rouge2"] * 100,
            "rougeL": rouge_results["rougeL"] * 100,
            "bleurt_f1": np.mean(bleurt_results["scores"]) * 100
        }
        
        logging.info("\n" + "="*80)
        logging.info("--- FINAL BULK EVALUATION METRICS ---".center(80))
        logging.info("="*80)
        for key, value in final_metrics.items():
            logging.info(f"  - {key}: {value:.4f}")
        logging.info("="*80)

    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)

if __name__ == "__main__":
    main()



2025-10-08 02:28:31,001 [INFO] - --- Starting Bulk Evaluation with Custom Inference Loop ---
2025-10-08 02:28:31,001 [INFO] - Loading model from: mbart-large-50-cnn-summarizer-v14/final_model
2025-10-08 02:28:33,938 [INFO] - Loading evaluation data from: ../Dataset/filtered_articles_CNN.csv
2025-10-08 02:28:34,118 [INFO] - Evaluation data prepared with 200 examples.
2025-10-08 02:28:34,118 [INFO] - Generating summaries for the dataset...
Summarizing Batches:   0%|          | 0/13 [10:42<?, ?it/s]


KeyboardInterrupt: 

In [None]:
import os
import logging
from datetime import datetime
import pandas as pd
import numpy as np
import torch
import evaluate
import unicodedata
from datasets import Dataset
from torch.utils.data import DataLoader
from transformers import (
    MBartForConditionalGeneration,
    MBart50TokenizerFast
)
from tqdm import tqdm
# Import the bleurt library directly for GPU-acceleration ---
try:
    from bleurt import score as bleurt_scorer
    BLEURT_INSTALLED = True
except ImportError:
    BLEURT_INSTALLED = False

# --- Configuration ---
MODEL_PATH = "mbart-large-50-cnn-summarizer-v14/final_model"
EVAL_DATA_PATH = "../Dataset/filtered_articles_CNN.csv" 
BATCH_SIZE = 16 
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_EVAL_SAMPLES = 500

# --- NEW: Control BLEURT on the GPU ---
USE_GPU_FOR_BLEURT = True

# --- Setup Logging ---
log_filename = f"bulk_evaluation_log_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] - %(message)s", handlers=[logging.FileHandler(log_filename), logging.StreamHandler()])

def sanitize_text(text):
    if not isinstance(text, str): return ""
    return text.replace('""', '"').strip()

def normalize_text(text):
    if not isinstance(text, str): return ""
    return ' '.join(unicodedata.normalize('NFKC', text).split())

def main():
    try:
        logging.info("--- Starting Bulk Evaluation with Custom Inference Loop ---")
        
        # 1. --- Load Model and Tokenizer ---
        logging.info(f"Loading model from: {MODEL_PATH}")
        model = MBartForConditionalGeneration.from_pretrained(MODEL_PATH).to(DEVICE)
        tokenizer = MBart50TokenizerFast.from_pretrained(MODEL_PATH)
        model.eval() 
        
        # 2. --- Load and Prepare the Evaluation Dataset ---
        logging.info(f"Loading evaluation data from: {EVAL_DATA_PATH}")
        df_eval = pd.read_csv(EVAL_DATA_PATH, engine='python', on_bad_lines='skip')
        df_eval.dropna(subset=['raw_news_article', 'english_summary', 'hindi_summary'], inplace=True)
        
        for col in ['raw_news_article', 'english_summary', 'hindi_summary']:
            df_eval[col] = df_eval[col].apply(sanitize_text).apply(normalize_text)
        
        eval_dataset = Dataset.from_pandas(df_eval)

        if NUM_EVAL_SAMPLES and NUM_EVAL_SAMPLES < len(eval_dataset):
            logging.warning(f"Using a subset of {NUM_EVAL_SAMPLES} examples for quick evaluation.")
            eval_dataset = eval_dataset.select(range(NUM_EVAL_SAMPLES))

        logging.info(f"Evaluation data prepared with {len(eval_dataset)} examples.")

        data_loader = DataLoader(eval_dataset, batch_size=BATCH_SIZE)

        # 3. --- Run Custom Inference Loop ---
        generated_eng_summaries, generated_hin_summaries = [], []
        reference_eng_summaries, reference_hin_summaries = [], []

        logging.info("Generating summaries for the dataset...")
        for batch in tqdm(data_loader, desc="Summarizing Batches"):
            articles = batch['raw_news_article']
            
            tokenizer.src_lang = "en_XX"
            inputs = tokenizer(articles, return_tensors="pt", max_length=1024, truncation=True, padding=True).to(DEVICE)
            
            NUM_BEAMS, NO_REPEAT_NGRAM_SIZE, REPETITION_PENALTY = 10, 3, 3.0
            LENGTH_PENALTY, DO_SAMPLE, EARLY_STOPPING = 1.0, True, True
            TOP_K, TOP_P, TEMPERATURE = 50, 0.95, 0.8
            
            input_word_count = np.mean([len(art.split()) for art in articles])
            ratio = 0.25 
            min_len, max_len = max(40, int(input_word_count * (ratio - 0.1))), min(256, int(input_word_count * (ratio + 0.1)))

            eng_summary_ids = model.generate(
                inputs.input_ids, num_beams=NUM_BEAMS, max_length=max_len, min_length=min_len, length_penalty=LENGTH_PENALTY,
                no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE, repetition_penalty=REPETITION_PENALTY, do_sample=DO_SAMPLE,
                early_stopping=EARLY_STOPPING, top_k=TOP_K, top_p=TOP_P, temperature=TEMPERATURE,
                forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"]
            )
            batch_eng_summaries = tokenizer.batch_decode(eng_summary_ids, skip_special_tokens=True)
            
            hin_summary_ids = model.generate(
                inputs.input_ids, num_beams=NUM_BEAMS, max_length=max_len, min_length=min_len, length_penalty=LENGTH_PENALTY,
                no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE, repetition_penalty=REPETITION_PENALTY, do_sample=DO_SAMPLE,
                early_stopping=EARLY_STOPPING, top_k=TOP_K, top_p=TOP_P, temperature=TEMPERATURE,
                forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"]
            )
            batch_hin_summaries = tokenizer.batch_decode(hin_summary_ids, skip_special_tokens=True)
            
            generated_eng_summaries.extend(batch_eng_summaries)
            generated_hin_summaries.extend(batch_hin_summaries)
            reference_eng_summaries.extend(batch['english_summary'])
            reference_hin_summaries.extend(batch['hindi_summary'])

        # 4. --- Calculate and Print Final Metrics ---
        logging.info("Summarization complete. Calculating final metrics...")
        rouge_metric = evaluate.load("rouge")
        
        all_generated = generated_eng_summaries + generated_hin_summaries
        all_references = reference_eng_summaries + reference_hin_summaries

        logging.info("Calculating ROUGE scores...")
        rouge_results = rouge_metric.compute(predictions=all_generated, references=all_references)
        final_metrics = { "rouge1": rouge_results["rouge1"] * 100, "rouge2": rouge_results["rouge2"] * 100, "rougeL": rouge_results["rougeL"] * 100 }

        if USE_GPU_FOR_BLEURT:
            if BLEURT_INSTALLED:
                logging.info("Calculating BLEURT scores on GPU (this may still take some time)...")
                bleurt_checkpoint = "bleurt-20"
                scorer = bleurt_scorer.BleurtScorer(bleurt_checkpoint)
                bleurt_scores = scorer.score(references=all_references, candidates=all_generated)
                final_metrics["bleurt_f1"] = np.mean(bleurt_scores) * 100
            else:
                logging.error("The 'bleurt' library is not installed. Skipping GPU-based BLEURT calculation. Please run 'pip install bleurt'.")
        else:
            logging.info("Calculating BLEURT scores on CPU (this will be very slow)...")
            bleurt_metric_cpu = evaluate.load("bleurt", "bleurt-20")
            bleurt_results = bleurt_metric_cpu.compute(predictions=all_generated, references=all_references)
            final_metrics["bleurt_f1"] = np.mean(bleurt_results["scores"]) * 100
        
        logging.info("\n" + "="*80)
        logging.info("--- FINAL BULK EVALUATION METRICS ---".center(80))
        logging.info("="*80)
        for key, value in final_metrics.items():
            logging.info(f"  - {key}: {value:.4f}")
        logging.info("="*80)

    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)

if __name__ == "__main__":
    main()



Individual Interactive Sumaarizer

In [17]:
import torch
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import textwrap
import logging
import numpy as np

# --- Configuration ---
MODEL_PATH = "mbart-large-50-cnn-summarizer-v14/final_model"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Global variables to hold the loaded model and tokenizer ---
model = None
tokenizer = None

def load_model():
    """
    Loads the fine-tuned mBART model and tokenizer into memory.
    This function is called automatically at the end of this cell.
    """
    global model, tokenizer
    
    if model is not None and tokenizer is not None:
        print("Model and tokenizer are already loaded.")
        return

    print(f"Using device: {DEVICE}")
    try:
        print(f"Loading model and tokenizer from: {MODEL_PATH}...")
        model = MBartForConditionalGeneration.from_pretrained(MODEL_PATH).to(DEVICE)
        tokenizer = MBart50TokenizerFast.from_pretrained(MODEL_PATH)
        print("\n" + "="*80)
        print("--- Model Ready ---".center(80))
        print("You can now generate summaries in the cells below.".center(80))
        print("="*80)
    except Exception as e:
        print(f"Error loading model: {e}")
        model, tokenizer = None, None

def generate_summary(article_text):
    """
    Takes a news article string and prints high-quality, abstractive summaries.
    Uses a robust length strategy and finely-tuned generation parameters.
    """
    if model is None or tokenizer is None:
        print("Model not loaded. Please run the setup cell (containing `load_model()`) first.")
        return

    # --- DEFINITIVE: Final, Robust Length Calculation ---
    input_word_count = len(article_text.split())
    # Give the model a generous window to work with, which prevents cut-off sentences.
    min_len = 30
    max_len = 510 # A generous max length. `early_stopping` will ensure it stops naturally.

    # --- DEFINITIVE: Finely-Tuned Beam Search for Factual, Complete Summaries ---
    gen_kwargs = {
        "num_beams": 12,
        "length_penalty": 2.0,            # Strongly encourages the model to generate complete sentences.
        "repetition_penalty": 2.5,        # Strongly discourages repeating words or phrases.
        "no_repeat_ngram_size": 3,        # Prevents repeating sequences of 3 words.
        "do_sample": False,               # CRITICAL: Disables sampling to prioritize factual accuracy.
        "early_stopping": True,           # Finishes generation when all beams have reached the end token.
        "min_length": min_len,
        "max_length": max_len,
    }
    
    print("\n" + "="*80)
    print("SOURCE ARTICLE (truncated):")
    print("="*80)
    print(textwrap.fill(article_text[:1000] + ("..." if len(article_text) > 1000 else ""), width=80))
    print(f"\n(Article length: {input_word_count} words. Target summary length: {min_len}-{max_len} tokens)")

    # Tokenize the article
    tokenizer.src_lang = "en_XX"
    inputs = tokenizer(article_text, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)

    # --- Generate English Summary ---
    eng_summary_ids = model.generate(
        inputs.input_ids,
        forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"],
        **gen_kwargs
    )
    english_summary = tokenizer.decode(eng_summary_ids[0], skip_special_tokens=True)
    
    print("\n" + "="*80)
    print("GENERATED ENGLISH SUMMARY:")
    print("="*80)
    print(textwrap.fill(english_summary, width=80))

    # --- Generate Hindi Summary ---
    hin_summary_ids = model.generate(
        inputs.input_ids,
        forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"],
        **gen_kwargs
    )
    hindi_summary = tokenizer.decode(hin_summary_ids[0], skip_special_tokens=True)
    
    print("\n" + "="*80)
    print("GENERATED HINDI SUMMARY:")
    print("="*80)
    print(textwrap.fill(hindi_summary, width=90))
    print("\n" + "="*80)

# --- Automatically load the model when this cell is run ---
load_model()



Using device: cuda
Loading model and tokenizer from: mbart-large-50-cnn-summarizer-v14/final_model...

                              --- Model Ready ---                               
               You can now generate summaries in the cells below.               


In [13]:
article_to_test = """
India secured a decisive victory over Australia in the final match of the T20 series, winning by a margin of 35 runs in Bengaluru. Batting first, India posted a competitive total of 198 for 4, thanks to a powerful half-century from captain Suryakumar Yadav, who scored 78 off just 45 balls. In response, Australia's chase faltered early as they lost key wickets to India's fast bowlers.
"""

generate_summary(article_to_test)


SOURCE ARTICLE (truncated):
 India secured a decisive victory over Australia in the final match of the T20
series, winning by a margin of 35 runs in Bengaluru. Batting first, India posted
a competitive total of 198 for 4, thanks to a powerful half-century from captain
Suryakumar Yadav, who scored 78 off just 45 balls. In response, Australia's
chase faltered early as they lost key wickets to India's fast bowlers.

(Article length: 66 words. Target summary length: 30-200 tokens)

GENERATED ENGLISH SUMMARY:
The Indian team secured a decisive victory over Australia in the final match of
the T20 series, winning by a margin of 35 runs in Bengaluru. Batting first,
India posted a competitive total of 198 for 4, thanks to a powerful half-century
from captain Suryakumar Yadav. However, Australia's chase faltered early as they
lost key wickets to India's fast bowlers.

GENERATED HINDI SUMMARY:
भारत ने बांगलारू में T20 सीरीज़ के अंतिम मैच में ऑस्ट्रेलिया पर निर्णायक जीत
हासिल की, जिसमें 35 रनों क

In [None]:
import textwrap
import torch
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import numpy as np

# --- Configuration ---
MODEL_PATH = "mbart-large-50-cnn-summarizer-v14/final_model"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = None
tokenizer = None

def load_model():
    """Loads the model and tokenizer."""
    global model, tokenizer
    if model and tokenizer:
        print("Model already loaded.")
        return
    try:
        print(f"Loading model from {MODEL_PATH}...")
        model = MBartForConditionalGeneration.from_pretrained(MODEL_PATH).to(DEVICE)
        tokenizer = MBart50TokenizerFast.from_pretrained(MODEL_PATH)
        print("--- Model Ready ---")
    except Exception as e:
        print(f"Error loading model: {e}")
        model, tokenizer = None, None

def generate_summary(article_text):
    """
    Generates and prints high-quality, abstractive summaries using the final optimized parameters.
    """
    if not model or not tokenizer:
        print("Model not loaded. Please run load_model() first.")
        return

    input_word_count = len(article_text.split())
    
    # --- DEFINITIVE: Final, Robust Length Calculation ---
    # Give the model a generous window to work with, which prevents cut-off sentences.
    min_len = 30
    max_len = 500 # A generous max length. `early_stopping` will ensure it stops naturally.

    # --- DEFINITIVE: Finely-Tuned Beam Search for Factual, Complete Summaries ---
    gen_kwargs = {
        "num_beams": 12,
        "length_penalty": 2.0,            # Strongly encourages the model to generate complete sentences.
        "repetition_penalty": 2.5,        # Strongly discourages repeating words or phrases.
        "no_repeat_ngram_size": 3,        # Prevents repeating sequences of 3 words.
        "do_sample": False,               # CRITICAL: Disables sampling to prioritize factual accuracy.
        "early_stopping": True,           # Finishes generation when all beams have reached the end token.
        "min_length": min_len,
        "max_length": max_len,
    }
    
    print("\n" + "="*80)
    print("SOURCE ARTICLE:")
    print(textwrap.fill(article_text, width=80))
    print(f"\n(Article length: {input_word_count} words. Target summary length: {min_len}-{max_len} tokens)")

    tokenizer.src_lang = "en_XX"
    inputs = tokenizer(article_text, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)

    eng_summary_ids = model.generate(inputs.input_ids, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"], **gen_kwargs)
    english_summary = tokenizer.decode(eng_summary_ids[0], skip_special_tokens=True)
    
    hin_summary_ids = model.generate(inputs.input_ids, forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"], **gen_kwargs)
    hindi_summary = tokenizer.decode(hin_summary_ids[0], skip_special_tokens=True)
    
    print("\n" + "-"*30 + " ENGLISH SUMMARY " + "-"*30)
    print(textwrap.fill(english_summary, width=100))
    
    print("\n" + "-"*30 + " HINDI SUMMARY " + "-"*32)
    print(textwrap.fill(hindi_summary, width=100))
    print("="*80)

def main():
    """
    Defines a list of diverse news articles and generates summaries for each.
    """
    
    # 1. --- Load the model once at the beginning ---
    load_model()

    # 2. --- Define a list of articles to test ---
    articles_to_test = [
        # --- Short Article (Sports) ---
        """India secured a decisive victory over Australia in the final match of the T20 series, winning by a margin of 35 runs in Bengaluru. Batting first, India posted a competitive total of 198 for 4, thanks to a powerful half-century from captain Suryakumar Yadav, who scored 78 off just 45 balls. In response, Australia's chase faltered early as they lost key wickets to India's fast bowlers.""",
        
        # --- Medium Article (Technology) ---
        """A major tech firm today unveiled its latest flagship smartphone, featuring a revolutionary new camera system with 'periscope zoom' technology. The device, which also boasts a foldable OLED display and 5G connectivity, aims to redefine the premium mobile market. Analysts are optimistic, noting that the innovative camera could be a key differentiator in a crowded field. However, concerns remain about the device's high price point, which exceeds $1,500, potentially limiting its mass-market appeal despite the advanced features.""",
        
        # --- Long Article (Science/Space) ---
        """NASA's Artemis program achieved a major milestone this week as the Orion spacecraft successfully completed its uncrewed flyby of the Moon and is now on its return trajectory to Earth. The mission, Artemis I, is a critical test of the agency's deep space exploration systems, including the powerful Space Launch System (SLS) rocket and the Orion crew capsule. During its journey, Orion traveled farther from Earth than any human-rated spacecraft has ever gone before, capturing stunning high-resolution images of the lunar surface and Earth from a distance. The spacecraft's heat shield will face its most extreme test during re-entry, when it will endure temperatures of nearly 5,000 degrees Fahrenheit while traveling at over 24,000 miles per hour. A successful splashdown in the Pacific Ocean will pave the way for Artemis II, the program's first crewed mission, which will send astronauts on a similar lunar flyby, further cementing humanity's path back to the Moon and, eventually, to Mars.""",
        
        # --- Medium Article (Business/Finance) ---
        """Global stock markets experienced a volatile week as central banks around the world signaled a more aggressive stance on combating inflation. The US Federal Reserve hinted at larger-than-expected interest rate hikes, causing a sell-off in technology stocks and growth-oriented sectors. Meanwhile, the European Central Bank is facing pressure to act as energy prices continue to soar across the continent, impacting both consumer spending and industrial production. Investors are now closely watching upcoming inflation data and corporate earnings reports for signs of a potential economic slowdown. Experts suggest a period of uncertainty is likely to continue as markets digest the new reality of tighter monetary policy.""",

        # --- Short Article (World News) ---
        """Diplomats from several nations met in Geneva to resume peace talks aimed at resolving a long-standing regional conflict. The negotiations, which had been stalled for months, were restarted following a recent de-escalation of hostilities. Observers are cautiously optimistic, but acknowledge that significant political hurdles remain. The primary goal of the current round of talks is to establish a lasting ceasefire and facilitate the delivery of humanitarian aid to affected civilian populations.""",
        
        # --- Long Article (Health/Science) ---
        """Researchers have published a landmark study in the journal 'Nature' detailing a new gene-editing technique that shows promise in correcting genetic defects responsible for certain inherited diseases. The method, which uses a modified version of the CRISPR-Cas9 system, demonstrated a significantly higher precision and lower rate of off-target mutations in lab experiments compared to existing technologies. The study focused on a specific mutation linked to cystic fibrosis, and the results in human cell cultures were highly encouraging. While the research is still in its early stages and human trials are years away, the scientific community is hailing it as a potential breakthrough. The technique's improved safety profile could overcome some of the major hurdles that have slowed the clinical application of gene therapy. However, the scientists involved urge caution, emphasizing that extensive further research is required to validate the findings and ensure the long-term safety and efficacy of this new approach before it can be considered for patient treatment.""",

        # --- Medium Article (Environment) ---
        """A new international report has found that the rate of deforestation in the Amazon rainforest accelerated by nearly 20% in the last year, reaching its highest level in over a decade. The report, which uses satellite data, attributes the surge to increased illegal logging, agricultural expansion, and mining activities. Environmental groups are calling for urgent government intervention and stronger enforcement of existing protection laws. The Amazon is a critical global ecosystem, playing a vital role in regulating the planet's climate by absorbing vast amounts of carbon dioxide. Scientists warn that continued deforestation could push the rainforest towards a tipping point, where it would transition into a drier, savanna-like state, with devastating consequences for global biodiversity and climate patterns.""",

        # --- Short Article (Entertainment) ---
        """The highly anticipated sequel to a blockbuster science fiction film has officially begun production, with the studio releasing the first on-set photo. The image features the return of the original cast members alongside several new additions. The director has promised that the sequel will expand the universe in exciting ways while honoring the spirit of the first film. The movie is currently slated for a summer release next year.""",

        # --- Medium Article (Automotive/Tech) ---
        """A legacy automaker has announced a massive $50 billion investment into its electric vehicle (EV) division, signaling a dramatic acceleration of its transition away from internal combustion engines. The company plans to launch 15 new all-electric models over the next three years, including sedans, SUVs, and a pickup truck. A key part of the strategy involves building several new "gigafactories" for battery production in North America and Europe to secure its supply chain. This move is seen by industry experts as a direct response to the growing dominance of EV-native companies and increasing regulatory pressure to reduce emissions.""",

        # --- Long Article (International Relations) ---
        """A new trade agreement between two major economic blocs was signed this week, aiming to reduce tariffs and streamline regulations across dozens of industries. The pact, which covers everything from agricultural goods to digital services, is the culmination of nearly five years of intense negotiations. Proponents argue that the deal will boost economic growth, lower consumer prices, and create hundreds of thousands of new jobs by fostering closer integration and simplifying cross-border commerce. However, the agreement has also faced criticism from labor unions and environmental groups, who argue that it lacks sufficient protections for workers' rights and fails to implement strong environmental standards. They warn that the deal could lead to a "race to the bottom" as companies relocate to regions with lower wages and weaker regulations. The signatory governments have defended the pact, stating that it includes robust chapters on labor and the environment and will be subject to regular reviews to ensure compliance and address any emerging issues.""",
    ]

    # 3. --- Loop through the articles and generate summaries ---
    for i, article in enumerate(articles_to_test, 1):
        print(f"\n\n--- SUMMARIZING ARTICLE {i}/{len(articles_to_test)} ---")
        generate_summary(article)

if __name__ == "__main__":
    main()



Loading model from mbart-large-50-cnn-summarizer-v14/final_model...
--- Model Ready ---


--- SUMMARIZING ARTICLE 1/10 ---

SOURCE ARTICLE:
India secured a decisive victory over Australia in the final match of the T20
series, winning by a margin of 35 runs in Bengaluru. Batting first, India posted
a competitive total of 198 for 4, thanks to a powerful half-century from captain
Suryakumar Yadav, who scored 78 off just 45 balls. In response, Australia's
chase faltered early as they lost key wickets to India's fast bowlers.

(Article length: 66 words. Target summary length: 30-200 tokens)

------------------------------ ENGLISH SUMMARY ------------------------------
The Indian team secured a decisive victory over Australia in the final match of
the T20 series, winning by a margin of 35 runs in Bengaluru. Batting first,
India posted a competitive total of 198 for 4, thanks to a powerful half-century
from captain Suryakumar Yadav. However, Australia's chase faltered early as they
lost key w

In [19]:
article_to_test = """A landmark international treaty to combat plastic pollution has been agreed upon by delegates from over 170 countries at a United Nations Environment Assembly session held in Nairobi. Hailed as the most significant environmental pact since the Paris Agreement, the resolution establishes an Intergovernmental Negotiating Committee (INC) tasked with drafting a legally binding agreement by the end of 2026. The future treaty aims to address the full lifecycle of plastic, from its production and design to its disposal and recycling. The negotiations were complex, with debates centering on whether the treaty should focus solely on plastic waste management or include caps on virgin plastic production. Major plastic-producing nations and fossil fuel companies had advocated for a focus on recycling, while a coalition of environmental groups and many developing nations pushed for stricter controls on production itself. The final resolution provides a broad mandate for the INC to consider all options.
"""
generate_summary(article_to_test)


SOURCE ARTICLE (truncated):
A landmark international treaty to combat plastic pollution has been agreed upon
by delegates from over 170 countries at a United Nations Environment Assembly
session held in Nairobi. Hailed as the most significant environmental pact since
the Paris Agreement, the resolution establishes an Intergovernmental Negotiating
Committee (INC) tasked with drafting a legally binding agreement by the end of
2026. The future treaty aims to address the full lifecycle of plastic, from its
production and design to its disposal and recycling. The negotiations were
complex, with debates centering on whether the treaty should focus solely on
plastic waste management or include caps on virgin plastic production. Major
plastic-producing nations and fossil fuel companies had advocated for a focus on
recycling, while a coalition of environmental groups and many developing nations
pushed for stricter controls on production itself. The final resolution provides
a broad mandate for