In [1]:
!pip install transformers rouge-score scikit-learn nltk torch --quiet

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for rouge-score (pyproject.toml) ... [?25l[?25hdone


In [2]:
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from rouge_score import rouge_scorer
from transformers import AutoTokenizer, AutoModel
import torch
from tqdm import tqdm

  * **h_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` or


In [3]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [4]:
from google.colab import drive
drive.mount('/content/drive')

df = pd.read_csv('/content/drive/My Drive/Percobaan/indonesia_dataset.csv')
df = df.dropna(subset=['summary', 'text'])

Mounted at /content/drive


In [5]:
# Setup IndoBERT
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
model = AutoModel.from_pretrained("google/flan-t5-small").to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

In [6]:
# Embedding cache
embedding_cache = {}
def embed_text(text):
    if text in embedding_cache:
        return embedding_cache[text]
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=225).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    emb = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
    embedding_cache[text] = emb
    return emb

In [7]:
# Summarization methods
def tfidf_summary(text, top_n=3):
    sentences = sent_tokenize(text)
    if len(sentences) <= top_n:
        return ' '.join(sentences)
    tfidf = TfidfVectorizer()
    matrix = tfidf.fit_transform(sentences)
    scores = matrix.sum(axis=1).A1
    top_idx = np.argsort(scores)[-top_n:]
    top_idx.sort()
    return ' '.join([sentences[i] for i in top_idx])

def indobert_summary(text, top_n=3):
    sentences = sent_tokenize(text)
    if len(sentences) <= top_n:
        return ' '.join(sentences)
    doc_embedding = embed_text(text)
    sent_embeddings = [embed_text(s) for s in sentences]
    similarities = [cosine_similarity([doc_embedding], [se])[0][0] for se in sent_embeddings]
    top_idx = np.argsort(similarities)[-top_n:]
    top_idx.sort()
    return ' '.join([sentences[i] for i in top_idx])

def gpt_like_summary(text, top_n=3):
    sentences = sent_tokenize(text)
    return ' '.join(sentences[:top_n])

In [8]:
# Evaluator
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

def evaluate_summary(reference, generated):
    try:
        emb_ref = embed_text(reference)
        emb_gen = embed_text(generated)
        cos_sim = cosine_similarity([emb_ref], [emb_gen])[0][0]
        rouge_scores = scorer.score(reference, generated)

        return {
            'cosine': cos_sim,
            'rouge1_precision': rouge_scores['rouge1'].precision,
            'rouge1_recall': rouge_scores['rouge1'].recall,
            'rouge1_f1': rouge_scores['rouge1'].fmeasure,
            'rouge2_precision': rouge_scores['rouge2'].precision,
            'rouge2_recall': rouge_scores['rouge2'].recall,
            'rouge2_f1': rouge_scores['rouge2'].fmeasure,
            'rougeL_precision': rouge_scores['rougeL'].precision,
            'rougeL_recall': rouge_scores['rougeL'].recall,
            'rougeL_f1': rouge_scores['rougeL'].fmeasure,
        }
    except:
        return {
            'cosine': 0.0,
            'rouge1_precision': 0.0, 'rouge1_recall': 0.0, 'rouge1_f1': 0.0,
            'rouge2_precision': 0.0, 'rouge2_recall': 0.0, 'rouge2_f1': 0.0,
            'rougeL_precision': 0.0, 'rougeL_recall': 0.0, 'rougeL_f1': 0.0,
        }

In [None]:
from tqdm import tqdm

results = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    text = row['text']
    gold = row['summary']

    try:
        # Ringkasan hanya dengan GPT
        gpt_sum = gpt_like_summary(text)

        # Evaluasi GPT
        gpt_eval = evaluate_summary(gold, gpt_sum)

        # Simpan jika ROUGE-1 F1 GPT >= 0.5
        if gpt_eval['rouge1_f1'] >= 0.5:
            results.append({
                'index': idx,
                'gold_summary': gold,
                'gpt_summary': gpt_sum,

                'gpt_cosine': gpt_eval['cosine'],
                'gpt_rouge1_precision': gpt_eval['rouge1_precision'],
                'gpt_rouge1_recall': gpt_eval['rouge1_recall'],
                'gpt_rouge1_f1': gpt_eval['rouge1_f1'],
                'gpt_rouge2_precision': gpt_eval['rouge2_precision'],
                'gpt_rouge2_recall': gpt_eval['rouge2_recall'],
                'gpt_rouge2_f1': gpt_eval['rouge2_f1'],
                'gpt_rougeL_precision': gpt_eval['rougeL_precision'],
                'gpt_rougeL_recall': gpt_eval['gpt_rougeL_recall'],
                'gpt_rougeL_f1': gpt_eval['gpt_rougeL_f1'],
            })

    except Exception as e:
        print(f"Error pada index {idx}: {e}")


 44%|████▍     | 16758/38240 [1:48:42<2:38:10,  2.26it/s]

In [None]:
def evaluate_summary(reference, prediction):
    scores = rouge.compute(predictions=[prediction], references=[reference], use_aggregator=True)
    cosine = compute_cosine_similarity(reference, prediction)

    return {
        'cosine': round(cosine, 4),
        'rouge1_precision': round(scores['rouge1'].mid.precision, 4),
        'rouge1_recall': round(scores['rouge1'].mid.recall, 4),
        'rouge1_f1': round(scores['rouge1'].mid.fmeasure, 4),
        'rouge2_precision': round(scores['rouge2'].mid.precision, 4),
        'rouge2_recall': round(scores['rouge2'].mid.recall, 4),
        'rouge2_f1': round(scores['rouge2'].mid.fmeasure, 4),
        'rougeL_precision': round(scores['rougeL'].mid.precision, 4),
        'rougeL_recall': round(scores['rougeL'].mid.recall, 4),
        'gpt_rougeL_f1': round(scores['rougeL'].mid.fmeasure, 4)
    }

In [None]:
# Simpan ke file CSV
results_df = pd.DataFrame(results)
results_df.to_csv("/content/drive/My Drive/Percobaan/gpt3.csv", index=False)