In [1]:
from google.colab import output
output.enable_custom_widget_manager()

In [None]:
#install library
!pip install -q transformers datasets accelerate rouge-score nltk rouge
!pip install --upgrade datasets
!pip install -q bert-score

#import library
import os
import torch
import nltk
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
from rouge import Rouge
from nltk.tokenize import word_tokenize
from nltk.translate.meteor_score import meteor_score
from bert_score import score as bert_score

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

In [3]:
# Mencegah disconnect di Colab walau sepertinya tidak berguna jadi manual pakai console javascript
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,garbage_collection_threshold:0.8"

def prevent_disconnect(interval=300):
    import time, requests
    while True:
        time.sleep(interval)
        requests.get("https://www.google.com")

import threading
keep_alive = threading.Thread(target=prevent_disconnect, daemon=True)
keep_alive.start()

In [4]:
import re

def clean_text(text):
    # Clean url/link
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Clean spasi berlebih
    text = re.sub(r'\s+', ' ', text).strip()

    return text


In [None]:
# Load dataset
dataset = load_dataset("cnn_dailymail", "3.0.0")

In [None]:
dataset["train"].select(range(5))

In [None]:
# Load T5 model dan tokenizer
model_name = "t5-base"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

In [None]:
print(model)

In [None]:
# Subset data
dataset['train'] = dataset['train'].select(range(10000))
dataset['validation'] = dataset['validation'].select(range(3000))
dataset['test'] = dataset['test'].select(range(3000))

# fungsi preprocessing
def preprocess_function(examples):
    cleaned_articles = [clean_text(doc) for doc in examples['article']]
    cleaned_highlights = [clean_text(hl) for hl in examples['highlights']]

    inputs = ["summarize: " + doc for doc in cleaned_articles]

    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding='max_length'
    )

    labels = tokenizer(
        text_target=cleaned_highlights,
        max_length=128,
        truncation=True,
        padding='max_length'
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Mapping dengan multiprocessing (jika stabil)
encoded_dataset = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=2  # atau hapus dulu jika belum stabil
)


In [10]:
# Subset kecil untuk mempercepat training
train_dataset = encoded_dataset["train"].shuffle(seed=42).select(range(10000))
test_dataset = encoded_dataset["validation"].shuffle(seed=42).select(range(3000))


In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./bart_results",
    eval_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=3,
    logging_steps=100,
    report_to="none"
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train model
trainer.train()


In [None]:
# Evaluasi
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

In [None]:
# Generate summaries
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_summary_batch(batch):
    with torch.no_grad():
        inputs = tokenizer(
            ["summarize: " + text for text in batch["article"]],
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(device)

        # Generate summaries with diverse beam search
        outputs = model.generate(
            inputs["input_ids"],
            max_length=128,
            num_beams=4,
            length_penalty=2.0,
            no_repeat_ngram_size=3,
            early_stopping=True
        )

        summaries = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        torch.cuda.empty_cache()

        return {"summary": summaries}

# Generate summaries in batches
summaries = test_dataset.map(generate_summary_batch, batched=True, batch_size=4)

In [None]:
# Print beberapa contoh summary
for i in range(3):
    print(f"\nExample {i+1}:")
    print("Article:", test_dataset[i]["article"][:200] + "...")
    print("\nReference Summary:", test_dataset[i]["highlights"])
    print("\nGenerated Summary:", summaries[i]["summary"])
    print("-" * 50)

# Calculate ROUGE scores
def calculate_rouge(references, predictions):
    rouge = Rouge()
    scores = rouge.get_scores(predictions, references, avg=True)
    return {
        "rouge-1": scores["rouge-1"]["f"],
        "rouge-2": scores["rouge-2"]["f"],
        "rouge-l": scores["rouge-l"]["f"]
    }

# Calculate METEOR scores
def calculate_meteor_score(references, predictions):
    total_score = 0.0
    count = 0
    for ref, pred in zip(references, predictions):
        try:
            ref_tokens = [word_tokenize(ref)]
            pred_tokens = word_tokenize(pred)
            score = meteor_score(ref_tokens, pred_tokens)
            total_score += score
            count += 1
        except:
            continue  # Lewati jika error
    return total_score / count if count > 0 else 0.0

# Calculate BERTScore
def calculate_bertscore(references, predictions):
    P, R, F1 = bert_score(predictions, references, lang="en", rescale_with_baseline=True)
    return {
        "bertscore_precision": P.mean().item(),
        "bertscore_recall": R.mean().item(),
        "bertscore_f1": F1.mean().item()
    }

rouge_scores = calculate_rouge(
    [ex["highlights"] for ex in test_dataset],
    [ex["summary"] for ex in summaries]
)

print("\nROUGE Scores:")
print(f"ROUGE-1: {rouge_scores['rouge-1']:.4f}")
print(f"ROUGE-2: {rouge_scores['rouge-2']:.4f}")
print(f"ROUGE-L: {rouge_scores['rouge-l']:.4f}")

# METEOR Score
meteor = calculate_meteor_score(
    [ex["highlights"] for ex in test_dataset],
    [ex["summary"] for ex in summaries]
)
print("METEOR Score:", meteor)

# BERTScore
bertscore_result = calculate_bertscore(
    [ex["highlights"] for ex in test_dataset],
    [ex["summary"] for ex in summaries]
)
print("\nBERTScore:")
print(f"Precision: {bertscore_result['bertscore_precision']:.4f}")
print(f"Recall:    {bertscore_result['bertscore_recall']:.4f}")
print(f"F1 Score:  {bertscore_result['bertscore_f1']:.4f}")