In [1]:
# ==============================================
# FINE-TUNING BART SUR CNN/DAILYMAIL (5000 exemples)
# ==============================================

print("="*60)
print("FINE-TUNING BART (140M) - COMME L'ARTICLE")
print("="*60)

# ==============================================
# 1. INSTALLATIONS
# ==============================================

!pip install transformers datasets accelerate rouge-score nltk -q

import torch
import numpy as np
from datasets import load_dataset
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
import gc
import os

# Nettoyage mémoire
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

print("✅ Bibliothèques installées")

# ==============================================
# 2. DATASET (5000 train, 1000 val, 1000 test)
# ==============================================

print("\n" + "="*60)
print("📊 CHARGEMENT DU DATASET")
print("="*60)

dataset = load_dataset("cnn_dailymail", "3.0.0")

# Split comme dans l'article
train_dataset = dataset["train"].select(range(5000))      # 5000 training
val_dataset = dataset["validation"].select(range(1000))   # 1000 validation
test_dataset = dataset["test"].select(range(1000))        # 1000 test

print(f"✅ Dataset prêt:")
print(f"  Training:   {len(train_dataset)} exemples")
print(f"  Validation: {len(val_dataset)} exemples")
print(f"  Test:       {len(test_dataset)} exemples")

# ==============================================
# 3. TOKENISATION
# ==============================================

print("\n" + "="*60)
print("🔤 TOKENISATION")
print("="*60)

tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")

def preprocess_function(examples):
    """Prétraitement comme dans l'article"""
    # Input: articles
    inputs = tokenizer(
        examples["article"],
        max_length=512,
        truncation=True,
        padding="max_length",
        return_tensors=None
    )

    # Labels: summaries
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["highlights"],
            max_length=128,
            truncation=True,
            padding="max_length",
            return_tensors=None
        )

    inputs["labels"] = labels["input_ids"]
    return inputs

print("Tokenisation en cours...")
tokenized_train = train_dataset.map(
    preprocess_function,
    batched=True,
    batch_size=8,
    remove_columns=train_dataset.column_names,
    desc="Tokenisation training"
)

tokenized_val = val_dataset.map(
    preprocess_function,
    batched=True,
    batch_size=8,
    remove_columns=val_dataset.column_names,
    desc="Tokenisation validation"
)

print("✅ Tokenisation terminée")

# ==============================================
# 4. MODÈLE BART (140M paramètres)
# ==============================================

print("\n" + "="*60)
print("🧠 CHARGEMENT DE BART-BASE (140M)")
print("="*60)

model = BartForConditionalGeneration.from_pretrained(
    "facebook/bart-base",
    use_cache=False  # Important pour gradient checkpointing
)

# Activer gradient checkpointing pour économiser mémoire
model.gradient_checkpointing_enable()

total_params = sum(p.numel() for p in model.parameters())
print(f"✅ BART-base chargé")
print(f"📊 Paramètres: {total_params/1e6:.1f}M")
print(f"📊 Device: {model.device}")

# ==============================================
# 5. CONFIGURATION DU FINE-TUNING
# ==============================================

print("\n" + "="*60)
print("⚙️  CONFIGURATION DU FINE-TUNING")
print("="*60)

training_args = TrainingArguments(
    output_dir="./bart-finetuned-5000",
    overwrite_output_dir=True,
    num_train_epochs=5,  # Comme dans l'article
    per_device_train_batch_size=2,  # Petit pour éviter OOM
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,  # Batch effectif = 16
    learning_rate=1e-5,  # Faible comme dans l'article
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    fp16=True,  # Mixed precision
    report_to="none",
    gradient_checkpointing=True,
    dataloader_pin_memory=False,
)

print("✅ Configuration définie:")
print(f"  • Epochs: 5 (comme l'article)")
print(f"  • Batch size: 2")
print(f"  • Learning rate: 1e-5")
print(f"  • Gradient checkpointing: ACTIVÉ")

# ==============================================
# 6. FINE-TUNING
# ==============================================

print("\n" + "="*60)
print("🔥 DÉBUT DU FINE-TUNING")
print("="*60)
print("⚠️  Cette étape prend 2-3 heures")
print("    Si OOM error, réduis batch_size à 1")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
)

try:
    train_result = trainer.train()
    print(f"\n✅ FINE-TUNING RÉUSSI !")
    print(f"⏱️  Temps: {train_result.metrics['train_runtime']/60:.1f} min")
    print(f"📉 Training loss: {train_result.metrics['train_loss']:.3f}")

except Exception as e:
    print(f"\n❌ ERREUR: {e}")
    print("\n🔄 Tentative avec batch_size=1...")

    # Réessayer avec batch size plus petit
    training_args.per_device_train_batch_size = 1
    training_args.per_device_eval_batch_size = 1
    training_args.gradient_accumulation_steps = 16

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        tokenizer=tokenizer,
    )

    train_result = trainer.train()
    print(f"\n✅ FINE-TUNING RÉUSSI avec batch_size=1")

# ==============================================
# 7. SAUVEGARDE DU MODÈLE FINE-TUNÉ
# ==============================================

print("\n" + "="*60)
print("💾 SAUVEGARDE DU MODÈLE FINE-TUNÉ")
print("="*60)

model_save_path = "./bart_finetuned_5000"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"✅ Modèle fine-tuné sauvegardé dans: {model_save_path}")

# ==============================================
# 8. ÉVALUATION SUR TEST SET (1000 exemples)
# ==============================================

print("\n" + "="*60)
print("📊 ÉVALUATION ROUGE SUR TEST SET")
print("="*60)

!pip install rouge-score -q
from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL', 'rougeLsum'], use_stemmer=True)

# Fonction de génération
def generate_summary(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        summary_ids = model.generate(
            inputs["input_ids"],
            max_length=100,
            min_length=30,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Évaluation sur 1000 exemples
print(f"Évaluation sur 1000 exemples du test set...")

rouge1_scores = []
rouge2_scores = []
rougeL_scores = []
rougeLsum_scores = []

import time
start_time = time.time()

for i in range(1000):
    article = test_dataset[i]["article"]
    reference = test_dataset[i]["highlights"]

    generated = generate_summary(article)
    scores = scorer.score(reference, generated)

    rouge1_scores.append(scores['rouge1'].fmeasure)
    rouge2_scores.append(scores['rouge2'].fmeasure)
    rougeL_scores.append(scores['rougeL'].fmeasure)
    rougeLsum_scores.append(scores['rougeLsum'].fmeasure)

    if (i + 1) % 100 == 0:
        progress = (i + 1) / 1000 * 100
        current_rouge1 = np.mean(rouge1_scores) * 100
        print(f"  {i+1}/1000 ({progress:.0f}%) - ROUGE-1: {current_rouge1:.1f}%")

eval_time = time.time() - start_time

# ==============================================
# 9. RÉSULTATS ROUGE (comme l'article)
# ==============================================

print("\n" + "="*60)
print("📈 RÉSULTATS ROUGE - BART FINE-TUNÉ")
print("="*60)

rouge1 = np.mean(rouge1_scores) * 100
rouge2 = np.mean(rouge2_scores) * 100
rougeL = np.mean(rougeL_scores) * 100
rougeLsum = np.mean(rougeLsum_scores) * 100

print(f"\n🎯 TES RÉSULTATS (1000 exemples):")
print(f"  ROUGE-1:    {rouge1:.2f}%")
print(f"  ROUGE-2:    {rouge2:.2f}%")
print(f"  ROUGE-L:    {rougeL:.2f}%")
print(f"  ROUGE-Lsum: {rougeLsum:.2f}%")

print(f"\n📊 STATISTIQUES:")
print(f"  Écart-type ROUGE-1: {np.std(rouge1_scores)*100:.2f}%")
print(f"  Temps d'évaluation: {eval_time/60:.1f} min")

# ==============================================
# 10. COMPARAISON AVEC L'ARTICLE
# ==============================================

print("\n" + "="*60)
print("📊 COMPARAISON AVEC L'ARTICLE (Table 3)")
print("="*60)

print(f"\n{'Modèle':<25} {'ROUGE-1':<10} {'ROUGE-2':<10} {'ROUGE-L':<10} {'ROUGE-Lsum':<10}")
print("-" * 65)
print(f"{'Article BART':<25} {27.61:<10.2f} {18.37:<10.2f} {28.52:<10.2f} {25.84:<10.2f}")
print(f"{'Ton BART (5000 ex)':<25} {rouge1:<10.2f} {rouge2:<10.2f} {rougeL:<10.2f} {rougeLsum:<10.2f}")
print("-" * 65)

difference_rouge1 = rouge1 - 27.61
print(f"\n📈 Différence ROUGE-1: {difference_rouge1:+.2f}%")

if difference_rouge1 > 0:
    print("✅ Ton modèle performe MIEUX que l'article !")
elif difference_rouge1 > -5:
    print("👍 Performance proche de l'article (normal avec moins de données)")
else:
    print("⚠️  Performance inférieure (normal: 5000 vs 287K exemples dans l'article)")

# ==============================================
# 11. SAUVEGARDE DES RÉSULTATS
# ==============================================

print("\n" + "="*60)
print("💾 SAUVEGARDE FINALE")
print("="*60)

import json
from datetime import datetime

# Créer dossier résultats
results_dir = "./bart_finetuned_results"
os.makedirs(results_dir, exist_ok=True)

# Sauvegarder les résultats
results = {
    "model": "BART-base (140M) fine-tuned",
    "training": {
        "examples": 5000,
        "validation": 1000,
        "epochs": 5,
        "learning_rate": 1e-5,
        "batch_size": 2
    },
    "evaluation": {
        "test_examples": 1000,
        "rouge1": float(rouge1),
        "rouge2": float(rouge2),
        "rougeL": float(rougeL),
        "rougeLsum": float(rougeLsum),
        "std_rouge1": float(np.std(rouge1_scores) * 100)
    },
    "comparison_with_article": {
        "article_rouge1": 27.61,
        "article_rouge2": 18.37,
        "article_rougeL": 28.52,
        "article_rougeLsum": 25.84,
        "difference_rouge1": float(difference_rouge1)
    },
    "date": datetime.now().isoformat()
}

with open(os.path.join(results_dir, "results.json"), "w") as f:
    json.dump(results, f, indent=2)

print(f"✅ Résultats sauvegardés dans: {results_dir}/results.json")

# ==============================================
# 12. TÉLÉCHARGEMENT
# ==============================================

print("\n" + "="*60)
print("📦 PRÉPARATION DU TÉLÉCHARGEMENT")
print("="*60)

import shutil

# Créer ZIP avec modèle + résultats
final_dir = "./bart_project_final"
os.makedirs(final_dir, exist_ok=True)

# Copier modèle
shutil.copytree(model_save_path, os.path.join(final_dir, "model"), dirs_exist_ok=True)
# Copier résultats
shutil.copy(os.path.join(results_dir, "results.json"), os.path.join(final_dir, "results.json"))

# Créer ZIP
zip_name = "bart_finetuned_project"
shutil.make_archive(zip_name, 'zip', final_dir)

# Télécharger
from google.colab import files
files.download(f"{zip_name}.zip")

print(f"\n✅ PROJET TERMINÉ !")
print(f"📦 Fichier: {zip_name}.zip")
print(f"📊 ROUGE-1: {rouge1:.2f}%")
print(f"📈 Comparaison article: {difference_rouge1:+.2f}%")

FINE-TUNING BART (140M) - COMME L'ARTICLE
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
✅ Bibliothèques installées

📊 CHARGEMENT DU DATASET


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

3.0.0/train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

3.0.0/train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

3.0.0/train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

3.0.0/validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

3.0.0/test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

✅ Dataset prêt:
  Training:   5000 exemples
  Validation: 1000 exemples
  Test:       1000 exemples

🔤 TOKENISATION


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

Tokenisation en cours...


Tokenisation training:   0%|          | 0/5000 [00:00<?, ? examples/s]



Tokenisation validation:   0%|          | 0/1000 [00:00<?, ? examples/s]

✅ Tokenisation terminée

🧠 CHARGEMENT DE BART-BASE (140M)


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

✅ BART-base chargé
📊 Paramètres: 139.4M
📊 Device: cpu

⚙️  CONFIGURATION DU FINE-TUNING
✅ Configuration définie:
  • Epochs: 5 (comme l'article)
  • Batch size: 2
  • Learning rate: 1e-5
  • Gradient checkpointing: ACTIVÉ

🔥 DÉBUT DU FINE-TUNING
⚠️  Cette étape prend 2-3 heures
    Si OOM error, réduis batch_size à 1


  trainer = Trainer(


Step,Training Loss,Validation Loss
500,1.1985,0.804495
1000,1.0126,0.778715
1500,0.9659,0.777881


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].



✅ FINE-TUNING RÉUSSI !
⏱️  Temps: 22.6 min
📉 Training loss: 1.746

💾 SAUVEGARDE DU MODÈLE FINE-TUNÉ
✅ Modèle fine-tuné sauvegardé dans: ./bart_finetuned_5000

📊 ÉVALUATION ROUGE SUR TEST SET
Évaluation sur 1000 exemples du test set...




  100/1000 (10%) - ROUGE-1: 32.9%
  200/1000 (20%) - ROUGE-1: 32.3%
  300/1000 (30%) - ROUGE-1: 31.9%
  400/1000 (40%) - ROUGE-1: 32.0%
  500/1000 (50%) - ROUGE-1: 31.4%
  600/1000 (60%) - ROUGE-1: 31.4%
  700/1000 (70%) - ROUGE-1: 31.5%
  800/1000 (80%) - ROUGE-1: 31.2%
  900/1000 (90%) - ROUGE-1: 31.3%
  1000/1000 (100%) - ROUGE-1: 31.3%

📈 RÉSULTATS ROUGE - BART FINE-TUNÉ

🎯 TES RÉSULTATS (1000 exemples):
  ROUGE-1:    31.25%
  ROUGE-2:    11.18%
  ROUGE-L:    21.65%
  ROUGE-Lsum: 28.63%

📊 STATISTIQUES:
  Écart-type ROUGE-1: 11.23%
  Temps d'évaluation: 16.4 min

📊 COMPARAISON AVEC L'ARTICLE (Table 3)

Modèle                    ROUGE-1    ROUGE-2    ROUGE-L    ROUGE-Lsum
-----------------------------------------------------------------
Article BART              27.61      18.37      28.52      25.84     
Ton BART (5000 ex)        31.25      11.18      21.65      28.63     
-----------------------------------------------------------------

📈 Différence ROUGE-1: +3.64%
✅ Ton modèle p

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ PROJET TERMINÉ !
📦 Fichier: bart_finetuned_project.zip
📊 ROUGE-1: 31.25%
📈 Comparaison article: +3.64%
