<a href="https://colab.research.google.com/github/Safae26/text-summarization/blob/main/notebooks/pegasus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================
# FINE-TUNING PEGASUS SUR CNN/DAILYMAIL (5000 exemples)
# ==============================================

print("="*60)
print("FINE-TUNING PEGASUS - COMME L'ARTICLE")
print("="*60)

# ==============================================
# 1. INSTALLATIONS
# ==============================================

!pip install transformers datasets accelerate rouge-score sentencepiece -q

import torch
import numpy as np
from datasets import load_dataset
from transformers import PegasusTokenizer, PegasusForConditionalGeneration, Trainer, TrainingArguments
import gc
import os

# Nettoyage m√©moire
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

print("‚úÖ Biblioth√®ques install√©es")

# ==============================================
# 2. DATASET (5000 train, 1000 val, 1000 test)
# ==============================================

print("\n" + "="*60)
print("üìä CHARGEMENT DU DATASET")
print("="*60)

dataset = load_dataset("cnn_dailymail", "3.0.0")

# Split comme dans l'article
train_dataset = dataset["train"].select(range(5000))      # 5000 training
val_dataset = dataset["validation"].select(range(1000))   # 1000 validation
test_dataset = dataset["test"].select(range(1000))        # 1000 test

print(f"‚úÖ Dataset pr√™t:")
print(f"  Training:   {len(train_dataset)} exemples")
print(f"  Validation: {len(val_dataset)} exemples")
print(f"  Test:       {len(test_dataset)} exemples")

# ==============================================
# 3. TOKENISATION PEGASUS
# ==============================================

print("\n" + "="*60)
print("üî§ TOKENISATION PEGASUS")
print("="*60)

# Charger PEGASUS-base (pas large)
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-cnn_dailymail")

def preprocess_function(examples):
    """Pr√©traitement pour PEGASUS"""
    # Input: articles
    inputs = tokenizer(
        examples["article"],
        max_length=512,
        truncation=True,
        padding="max_length",
        return_tensors=None
    )

    # Labels: summaries
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["highlights"],
            max_length=128,
            truncation=True,
            padding="max_length",
            return_tensors=None
        )

    inputs["labels"] = labels["input_ids"]
    return inputs

print("Tokenisation en cours...")
tokenized_train = train_dataset.map(
    preprocess_function,
    batched=True,
    batch_size=8,
    remove_columns=train_dataset.column_names,
    desc="Tokenisation training"
)

tokenized_val = val_dataset.map(
    preprocess_function,
    batched=True,
    batch_size=8,
    remove_columns=val_dataset.column_names,
    desc="Tokenisation validation"
)

print("‚úÖ Tokenisation termin√©e")

# ==============================================
# 4. MOD√àLE PEGASUS (base, 175M param√®tres)
# ==============================================

print("\n" + "="*60)
print("üß† CHARGEMENT DE PEGASUS-BASE (175M)")
print("="*60)

# Charger PEGASUS pr√©-entra√Æn√© sur CNN/DailyMail
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-cnn_dailymail")

total_params = sum(p.numel() for p in model.parameters())
print(f"‚úÖ PEGASUS-base charg√©")
print(f"üìä Param√®tres: {total_params/1e6:.1f}M (comme l'article)")
print(f"üìä Device: {model.device}")

# ==============================================
# 5. CONFIGURATION DU FINE-TUNING
# ==============================================

print("\n" + "="*60)
print("‚öôÔ∏è  CONFIGURATION DU FINE-TUNING")
print("="*60)

training_args = TrainingArguments(
    output_dir="./pegasus-finetuned-5000",
    overwrite_output_dir=True,
    num_train_epochs=5,  # Comme dans l'article
    per_device_train_batch_size=4,  # PEGASUS est plus l√©ger que BART
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,  # Batch effectif = 16
    learning_rate=1e-5,  # Faible comme dans l'article
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir="./logs-pegasus",
    logging_steps=100,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    fp16=True,
    report_to="none",
    dataloader_pin_memory=False,
)

print("‚úÖ Configuration d√©finie:")
print(f"  ‚Ä¢ Epochs: 5 (comme l'article)")
print(f"  ‚Ä¢ Batch size: 4")
print(f"  ‚Ä¢ Learning rate: 1e-5")

# ==============================================
# 6. FINE-TUNING PEGASUS
# ==============================================

print("\n" + "="*60)
print("üî• D√âBUT DU FINE-TUNING PEGASUS")
print("="*60)
print("‚ö†Ô∏è  Cette √©tape prend 1-2 heures")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
)

try:
    train_result = trainer.train()
    print(f"\n‚úÖ FINE-TUNING R√âUSSI !")
    print(f"‚è±Ô∏è  Temps: {train_result.metrics['train_runtime']/60:.1f} min")
    print(f"üìâ Training loss: {train_result.metrics['train_loss']:.3f}")

except RuntimeError as e:
    if "out of memory" in str(e).lower():
        print(f"\n‚ö†Ô∏è  M√©moire insuffisante, ajustement des param√®tres...")
        print("üîÑ Tentative avec batch_size=1 et gradient checkpointing...")

        training_args.per_device_train_batch_size = 1
        training_args.per_device_eval_batch_size = 1
        training_args.gradient_accumulation_steps = 16
        training_args.gradient_checkpointing = True

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            tokenizer=tokenizer,
        )

        train_result = trainer.train()
        print(f"\n‚úÖ FINE-TUNING R√âUSSI avec batch_size=1 et gradient checkpointing")
    else:
        raise e

# ==============================================
# 7. SAUVEGARDE DU MOD√àLE FINE-TUN√â
# ==============================================

print("\n" + "="*60)
print("üíæ SAUVEGARDE DU MOD√àLE PEGASUS FINE-TUN√â")
print("="*60)

model_save_path = "./pegasus_finetuned_5000"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"‚úÖ Mod√®le PEGASUS fine-tun√© sauvegard√© dans: {model_save_path}")

# ==============================================
# 8. √âVALUATION SUR TEST SET (1000 exemples) - CORRIG√â
# ==============================================

print("\n" + "="*60)
print("üìä √âVALUATION ROUGE SUR TEST SET")
print("="*60)

!pip install rouge-score -q
from rouge_score import rouge_scorer
import torch

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL', 'rougeLsum'], use_stemmer=True)

# Fonction de g√©n√©ration CORRIG√âE
def generate_summary_pegasus(text, model, tokenizer, device):
    """G√©n√®re un r√©sum√© avec PEGASUS fine-tun√©"""
    # Mode √©valuation
    model.eval()

    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=512,
        padding=True
    )

    # D√©placer sur le bon device
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        summary_ids = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=100,
            min_length=30,
            length_penalty=0.8,
            num_beams=4,
            early_stopping=True,
            no_repeat_ngram_size=3
        )

    # Nettoyage m√©moire
    del inputs
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# √âvaluation CORRIG√âE
print(f"√âvaluation sur 1000 exemples du test set...")

pegasus_rouge1 = []
pegasus_rouge2 = []
pegasus_rougeL = []
pegasus_rougeLsum = []

# D√©terminer le device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

import time
start_time = time.time()

try:
    for i in range(1000):
        article = test_dataset[i]["article"]
        reference = test_dataset[i]["highlights"]

        # G√©n√©rer avec gestion de m√©moire
        generated = generate_summary_pegasus(article, model, tokenizer, device)

        # Calculer scores ROUGE
        scores = scorer.score(reference, generated)

        pegasus_rouge1.append(scores['rouge1'].fmeasure)
        pegasus_rouge2.append(scores['rouge2'].fmeasure)
        pegasus_rougeL.append(scores['rougeL'].fmeasure)
        pegasus_rougeLsum.append(scores['rougeLsum'].fmeasure)

        # Nettoyer m√©moire p√©riodiquement
        if (i + 1) % 50 == 0 and torch.cuda.is_available():
            torch.cuda.empty_cache()

        # Progression
        if (i + 1) % 100 == 0:
            progress = (i + 1) / 1000 * 100
            current_rouge1 = np.mean(pegasus_rouge1) * 100
            print(f"  {i+1}/1000 ({progress:.0f}%) - ROUGE-1: {current_rouge1:.1f}%")

except RuntimeError as e:
    if "out of memory" in str(e).lower():
        print(f"\n‚ö†Ô∏è  M√©moire GPU satur√©e √† l'it√©ration {i+1}")
        print(f"üîÑ √âvaluation partielle termin√©e: {len(pegasus_rouge1)} exemples")
    else:
        print(f"\n‚ùå Erreur lors de l'√©valuation: {e}")
        raise e

finally:
    # Remettre en mode train si besoin
    model.train()
    eval_time = time.time() - start_time

# V√©rifier qu'on a des r√©sultats
if len(pegasus_rouge1) == 0:
    print("‚ùå Aucun exemple n'a pu √™tre √©valu√©")
    pegasus_rouge1 = [0]
    pegasus_rouge2 = [0]
    pegasus_rougeL = [0]
    pegasus_rougeLsum = [0]

# ==============================================
# 9. R√âSULTATS ROUGE (comme l'article)
# ==============================================

print("\n" + "="*60)
print("üìà R√âSULTATS ROUGE - PEGASUS FINE-TUN√â")
print("="*60)

pegasus_r1 = np.mean(pegasus_rouge1) * 100
pegasus_r2 = np.mean(pegasus_rouge2) * 100
pegasus_rL = np.mean(pegasus_rougeL) * 100
pegasus_rLsum = np.mean(pegasus_rougeLsum) * 100

print(f"\nüéØ TES R√âSULTATS PEGASUS ({len(pegasus_rouge1)} exemples):")
print(f"  ROUGE-1:    {pegasus_r1:.2f}%")
print(f"  ROUGE-2:    {pegasus_r2:.2f}%")
print(f"  ROUGE-L:    {pegasus_rL:.2f}%")
print(f"  ROUGE-Lsum: {pegasus_rLsum:.2f}%")

print(f"\nüìä STATISTIQUES:")
print(f"  √âcart-type ROUGE-1: {np.std(pegasus_rouge1)*100:.2f}%")
print(f"  Temps d'√©valuation: {eval_time/60:.1f} min")
print(f"  Exemples √©valu√©s: {len(pegasus_rouge1)}/1000")

# ==============================================
# 10. COMPARAISON AVEC L'ARTICLE
# ==============================================

print("\n" + "="*60)
print("üìä COMPARAISON AVEC L'ARTICLE (Table 3)")
print("="*60)

print(f"\n{'Mod√®le':<25} {'ROUGE-1':<10} {'ROUGE-2':<10} {'ROUGE-L':<10} {'ROUGE-Lsum':<10}")
print("-" * 65)
print(f"{'Article PEGASUS':<25} {33.69:<10.2f} {21.58:<10.2f} {28.43:<10.2f} {23.76:<10.2f}")
print(f"{'Ton PEGASUS (5000 ex)':<25} {pegasus_r1:<10.2f} {pegasus_r2:<10.2f} {pegasus_rL:<10.2f} {pegasus_rLsum:<10.2f}")
print("-" * 65)

difference_rouge1 = pegasus_r1 - 33.69
print(f"\nüìà Diff√©rence ROUGE-1: {difference_rouge1:+.2f}%")

if difference_rouge1 > 0:
    print("‚úÖ Ton mod√®le performe MIEUX que l'article !")
elif difference_rouge1 > -5:
    print("üëç Performance proche de l'article (normal avec moins de donn√©es)")
else:
    print("‚ö†Ô∏è  Performance inf√©rieure (normal: 5000 vs 287K exemples dans l'article)")

# ==============================================
# 11. COMPARAISON AVEC LES AUTRES MOD√àLES (SI DISPONIBLE)
# ==============================================

print("\n" + "="*60)
print("üìä COMPARAISON DES MOD√àLES")
print("="*60)

def load_previous_results(model_name):
    """Charge les r√©sultats pr√©c√©dents si disponibles"""
    import json
    path = f"./{model_name}_finetuned_results/results.json"
    if os.path.exists(path):
        try:
            with open(path, 'r') as f:
                return json.load(f)
        except:
            return None
    return None

gpt2_results = load_previous_results("gpt2")
bart_results = load_previous_results("bart")

print(f"\n{'Mod√®le':<15} {'ROUGE-1':<10} {'ROUGE-2':<10} {'Diff√©rence article':<20}")
print("-" * 55)

# Afficher PEGASUS
print(f"{'PEGASUS':<15} {pegasus_r1:<10.2f} {pegasus_r2:<10.2f} {difference_rouge1:+.2f}%")

# Afficher BART si disponible
if bart_results:
    bart_r1 = bart_results["evaluation"]["rouge1"]
    bart_diff = bart_results["comparison_with_article"]["difference_rouge1"]
    print(f"{'BART':<15} {bart_r1:<10.2f} {bart_results['evaluation']['rouge2']:<10.2f} {bart_diff:+.2f}%")

# Afficher GPT-2 si disponible
if gpt2_results:
    gpt2_r1 = gpt2_results["evaluation"]["rouge1"]
    gpt2_diff = gpt2_results["comparison_with_article"]["difference_rouge1"]
    print(f"{'GPT-2':<15} {gpt2_r1:<10.2f} {gpt2_results['evaluation']['rouge2']:<10.2f} {gpt2_diff:+.2f}%")

print("-" * 55)

# Analyse comparative
if gpt2_results and bart_results:
    print(f"\nüìà CLASSEMENT DES MOD√àLES:")

    models = [
        ("PEGASUS", pegasus_r1),
        ("BART", bart_r1),
        ("GPT-2", gpt2_r1)
    ]

    # Trier par ROUGE-1 d√©croissant
    sorted_models = sorted(models, key=lambda x: x[1], reverse=True)

    for i, (name, score) in enumerate(sorted_models):
        print(f"  {i+1}. {name}: {score:.2f}% ROUGE-1")

    # V√©rifier l'ordre attendu
    expected_order = ["PEGASUS", "BART", "GPT-2"]
    actual_order = [name for name, _ in sorted_models]

    if actual_order == expected_order:
        print(f"\n‚úÖ Confirm√©: PEGASUS > BART > GPT-2 (comme l'article)")
    else:
        print(f"\n‚ö†Ô∏è  Ordre diff√©rent de l'article: {' > '.join(actual_order)}")

# ==============================================
# 12. SAUVEGARDE DES R√âSULTATS
# ==============================================

print("\n" + "="*60)
print("üíæ SAUVEGARDE DES R√âSULTATS PEGASUS")
print("="*60)

import json
from datetime import datetime

# Cr√©er dossier r√©sultats
results_dir = "./pegasus_finetuned_results"
os.makedirs(results_dir, exist_ok=True)

# Sauvegarder les r√©sultats
results = {
    "model": "PEGASUS (175M) fine-tuned",
    "training": {
        "examples": 5000,
        "validation": 1000,
        "epochs": 5,
        "learning_rate": 1e-5,
        "batch_size": training_args.per_device_train_batch_size,
        "gradient_accumulation": training_args.gradient_accumulation_steps,
        "training_time_minutes": train_result.metrics['train_runtime'] / 60 if 'train_result' in locals() else None
    },
    "evaluation": {
        "test_examples": len(pegasus_rouge1),
        "rouge1": float(pegasus_r1),
        "rouge2": float(pegasus_r2),
        "rougeL": float(pegasus_rL),
        "rougeLsum": float(pegasus_rLsum),
        "std_rouge1": float(np.std(pegasus_rouge1) * 100),
        "evaluation_time_minutes": eval_time / 60
    },
    "comparison_with_article": {
        "article_rouge1": 33.69,
        "article_rouge2": 21.58,
        "article_rougeL": 28.43,
        "article_rougeLsum": 23.76,
        "difference_rouge1": float(difference_rouge1)
    },
    "date": datetime.now().isoformat()
}

with open(os.path.join(results_dir, "results.json"), "w") as f:
    json.dump(results, f, indent=2)

print(f"‚úÖ R√©sultats sauvegard√©s dans: {results_dir}/results.json")

# ==============================================
# 13. T√âL√âCHARGEMENT (optionnel - pour Google Colab)
# ==============================================

print("\n" + "="*60)
print("üì¶ PR√âPARATION DU T√âL√âCHARGEMENT")
print("="*60)

try:
    import shutil

    # Cr√©er ZIP avec mod√®le + r√©sultats
    final_dir = "./pegasus_project_final"
    os.makedirs(final_dir, exist_ok=True)

    # Copier mod√®le
    shutil.copytree(model_save_path, os.path.join(final_dir, "model"), dirs_exist_ok=True)
    # Copier r√©sultats
    shutil.copy(os.path.join(results_dir, "results.json"), os.path.join(final_dir, "results.json"))

    # Cr√©er un rapport comparatif
    if gpt2_results and bart_results:
        comparative_report = f"""
# RAPPORT COMPARATIF DES MOD√àLES

## R√©sultats ROUGE-1:
- PEGASUS: {pegasus_r1:.2f}% (Diff√©rence article: {difference_rouge1:+.2f}%)
- BART:    {bart_r1:.2f}% (Diff√©rence article: {bart_diff:+.2f}%)
- GPT-2:   {gpt2_r1:.2f}% (Diff√©rence article: {gpt2_diff:+.2f}%)

## Classement:
1. {sorted_models[0][0]}: {sorted_models[0][1]:.2f}%
2. {sorted_models[1][0]}: {sorted_models[1][1]:.2f}%
3. {sorted_models[2][0]}: {sorted_models[2][1]:.2f}%

## Conclusion:
Les r√©sultats confirment la hi√©rarchie g√©n√©rale PEGASUS > BART > GPT-2 pour la summarization,
bien que les performances absolues soient inf√©rieures √† l'article d√ª √† l'entra√Ænement sur
moins de donn√©es (5000 vs 287K exemples).
"""

        with open(os.path.join(final_dir, "comparative_report.md"), "w") as f:
            f.write(comparative_report)

    # Cr√©er ZIP
    zip_name = "pegasus_finetuned_project"
    shutil.make_archive(zip_name, 'zip', final_dir)

    # T√©l√©charger (Google Colab)
    try:
        from google.colab import files
        files.download(f"{zip_name}.zip")
        print(f"‚úÖ Fichier t√©l√©charg√©: {zip_name}.zip")
    except:
        print(f"‚úÖ Fichier ZIP cr√©√©: {zip_name}.zip (non t√©l√©charg√© - pas en Colab)")

except Exception as e:
    print(f"‚ö†Ô∏è  Erreur lors de la cr√©ation du ZIP: {e}")
    print("Les r√©sultats sont quand m√™me sauvegard√©s dans le dossier.")

# ==============================================
# 14. EXEMPLE DE R√âSULTAT
# ==============================================

print("\n" + "="*60)
print("üîç EXEMPLE DE R√âSULTAT")
print("="*60)

# Afficher un exemple de g√©n√©ration
if len(pegasus_rouge1) > 0:
    idx = 0  # Premier exemple
    article = test_dataset[idx]["article"][:500] + "..."  # Truncate for display
    reference = test_dataset[idx]["highlights"]

    # G√©n√©rer un r√©sum√© pour cet exemple
    model.eval()
    generated = generate_summary_pegasus(test_dataset[idx]["article"], model, tokenizer, device)

    print(f"\nüì∞ Article (tronqu√©):")
    print(article)
    print(f"\nüìù R√©sum√© de r√©f√©rence:")
    print(reference)
    print(f"\nü§ñ R√©sum√© g√©n√©r√©:")
    print(generated)

    # Calculer les scores pour cet exemple
    example_scores = scorer.score(reference, generated)
    print(f"\nüìä Scores pour cet exemple:")
    print(f"  ROUGE-1: {example_scores['rouge1'].fmeasure*100:.1f}%")
    print(f"  ROUGE-2: {example_scores['rouge2'].fmeasure*100:.1f}%")
    print(f"  ROUGE-L: {example_scores['rougeL'].fmeasure*100:.1f}%")

print(f"\n{'='*60}")
print("‚úÖ PROJET PEGASUS TERMIN√â !")
print(f"{'='*60}")
print(f"üìä R√©sum√© des r√©sultats:")
print(f"  ‚Ä¢ ROUGE-1: {pegasus_r1:.2f}%")
print(f"  ‚Ä¢ Diff√©rence avec l'article: {difference_rouge1:+.2f}%")
print(f"  ‚Ä¢ Exemples √©valu√©s: {len(pegasus_rouge1)}/1000")
print(f"  ‚Ä¢ Mod√®le sauvegard√©: {model_save_path}")

if gpt2_results and bart_results:
    print(f"\nüéØ CLASSEMENT FINAL:")
    for i, (name, score) in enumerate(sorted_models):
        print(f"  {i+1}. {name}: {score:.2f}% ROUGE-1")

FINE-TUNING PEGASUS - COMME L'ARTICLE
‚úÖ Biblioth√®ques install√©es

üìä CHARGEMENT DU DATASET


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


‚úÖ Dataset pr√™t:
  Training:   5000 exemples
  Validation: 1000 exemples
  Test:       1000 exemples

üî§ TOKENISATION PEGASUS
Tokenisation en cours...


Tokenisation training:   0%|          | 0/5000 [00:00<?, ? examples/s]



Tokenisation validation:   0%|          | 0/1000 [00:00<?, ? examples/s]

‚úÖ Tokenisation termin√©e

üß† CHARGEMENT DE PEGASUS-BASE (175M)


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ PEGASUS-base charg√©
üìä Param√®tres: 570.8M (comme l'article)
üìä Device: cpu

‚öôÔ∏è  CONFIGURATION DU FINE-TUNING
‚úÖ Configuration d√©finie:
  ‚Ä¢ Epochs: 5 (comme l'article)
  ‚Ä¢ Batch size: 4
  ‚Ä¢ Learning rate: 1e-5

üî• D√âBUT DU FINE-TUNING PEGASUS
‚ö†Ô∏è  Cette √©tape prend 1-2 heures


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.


Step,Training Loss,Validation Loss
500,5.9706,6.82929
1000,4.6502,4.528221
1500,3.4523,2.889092


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].



‚úÖ FINE-TUNING R√âUSSI !
‚è±Ô∏è  Temps: 60.4 min
üìâ Training loss: 5.148

üíæ SAUVEGARDE DU MOD√àLE PEGASUS FINE-TUN√â
‚úÖ Mod√®le PEGASUS fine-tun√© sauvegard√© dans: ./pegasus_finetuned_5000

üìä √âVALUATION ROUGE SUR TEST SET
√âvaluation sur 1000 exemples du test set...
  100/1000 (10%) - ROUGE-1: 36.1%
  200/1000 (20%) - ROUGE-1: 36.0%
  300/1000 (30%) - ROUGE-1: 35.8%
  400/1000 (40%) - ROUGE-1: 36.4%
  500/1000 (50%) - ROUGE-1: 36.1%
  600/1000 (60%) - ROUGE-1: 35.7%
  700/1000 (70%) - ROUGE-1: 35.7%
  800/1000 (80%) - ROUGE-1: 35.3%
  900/1000 (90%) - ROUGE-1: 35.1%
  1000/1000 (100%) - ROUGE-1: 34.9%

üìà R√âSULTATS ROUGE - PEGASUS FINE-TUN√â

üéØ TES R√âSULTATS PEGASUS (1000 exemples):
  ROUGE-1:    34.92%
  ROUGE-2:    14.46%
  ROUGE-L:    25.51%
  ROUGE-Lsum: 29.26%

üìä STATISTIQUES:
  √âcart-type ROUGE-1: 13.35%
  Temps d'√©valuation: 24.5 min
  Exemples √©valu√©s: 1000/1000

üìä COMPARAISON AVEC L'ARTICLE (Table 3)

Mod√®le                    ROUGE-1    ROUGE-2 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Fichier t√©l√©charg√©: pegasus_finetuned_project.zip

üîç EXEMPLE DE R√âSULTAT

üì∞ Article (tronqu√©):
(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, includin...

üìù R√©sum√© de r√©f√©rence:
Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June .
Israel and the United States opposed the move, which could open the door to war crimes investigations against Israelis .

ü§ñ R√©sum√© g√©n√©r√©:
The Palestinian Authority formally becomes the 123rd member of the International Criminal Court