<a href="https://colab.research.google.com/github/Raniamea/arabic-video-summarisation/blob/main/notebooks/BaseLineSummarisation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =========================================================
# Mount Google Drive and define base path
# =========================================================
from google.colab import drive
import os

# Safe mount: avoids duplicate mount warnings
if not os.path.ismount("/content/drive"):
    drive.mount("/content/drive", force_remount=False)

# Define base path for project files
BASE_PATH = "/content/drive/MyDrive/ArabicVideoSummariser"
os.makedirs(BASE_PATH, exist_ok=True)
print(f"Base path set to: {BASE_PATH}")


In [None]:
!pip install -q "evaluate>=0.4.2,<0.5.0" "rouge-score==0.1.2" "bert-score==0.3.13" "transformers>=4.44,<4.47"

In [None]:
# ==============================================================
# Baseline Evaluation of Pre-trained Summarization Models
# with ROUGE and BERTScore (fixed version)
# ==============================================================

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch, pandas as pd, evaluate, os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---- Working public models ----
models_to_test = {
    "AraBART": "moussaKam/AraBART",                    # working AraBART
    "AraBART-finetuned": "ahmeddbahaa/AraBART-finetuned-ar",
    "mT5-XLsum": "csebuetnlp/mT5_multilingual_XLSum",
    "mBART-50": "facebook/mbart-large-50"
}

# ---- Paths ----
transcript_path = "/content/drive/MyDrive/ArabicVideoSummariser/transcripts/KhanElkhalili_ar.txt"
reference_path  = "/content/drive/MyDrive/ArabicVideoSummariser/summaries/KhanElkhalili_Summary.txt"

with open(transcript_path, "r", encoding="utf-8") as f:
    transcript_text = f.read().strip()

with open(reference_path, "r", encoding="utf-8") as f:
    reference_summary = f.read().strip()

transcript_text = transcript_text[:2000]

# ---- Inference settings ----
max_input_tokens = 512
max_new_tokens = 150
num_beams = 4

def generate_summary(model_name, model_id, text):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id).to(device)
    model.eval()

    if "mbart" in model_id.lower():
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(device)
        forced_bos = tokenizer.lang_code_to_id.get("ar_AR", None)
        ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            num_beams=num_beams,
            forced_bos_token_id=forced_bos
        )
    else:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(device)
        ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            num_beams=num_beams,
            early_stopping=True
        )
    return tokenizer.decode(ids[0], skip_special_tokens=True)

# ---- Evaluation metrics ----
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

results = []

for name, mid in models_to_test.items():
    print(f"\nRunning model: {name}")
    try:
        summary = generate_summary(name, mid, transcript_text)

        # Compute ROUGE
        rouge_scores = rouge.compute(
            predictions=[summary],
            references=[reference_summary],
            use_stemmer=True
        )

        # Compute BERTScore (Arabic) without baseline scaling
        bert = bertscore.compute(
            predictions=[summary],
            references=[reference_summary],
            lang="ar"
        )
        bert_f1 = bert["f1"][0]

        results.append({
            "Model": name,
            "ROUGE-1": round(rouge_scores["rouge1"] * 100, 2),
            "ROUGE-2": round(rouge_scores["rouge2"] * 100, 2),
            "ROUGE-L": round(rouge_scores["rougeL"] * 100, 2),
            "BERTScore F1": round(bert_f1 * 100, 2),
            "Generated Summary": summary[:400] + ("..." if len(summary) > 400 else "")
        })

    except Exception as e:
        print(f"⚠️ Error running {name}: {e}")
        results.append({
            "Model": name,
            "ROUGE-1": None,
            "ROUGE-2": None,
            "ROUGE-L": None,
            "BERTScore F1": None,
            "Generated Summary": f"Error: {e}"
        })

# ---- Results ----
df = pd.DataFrame(results)
display(df)

# ---- Save to Drive for thesis logs ----
save_path = "/content/drive/MyDrive/ArabicVideoSummariser/eval_results.csv"
df.to_csv(save_path, index=False, encoding="utf-8-sig")
print(f"\n✅ Results saved to: {save_path}")

# ---- Print summaries ----
for r in results:
    print(f"\n{'='*80}\nModel: {r['Model']}\n{'='*80}")
    print(r['Generated Summary'])
