In [7]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import re
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
!pip install rouge_score bert_score
from rouge_score import rouge_scorer
from bert_score import score as bertscore




In [8]:
url_english = "https://drive.google.com/uc?export=download&id=1J5zhBA3qvKFPbADziKRv1xBQ9A_jMdRs"
url_indo    = "https://drive.google.com/uc?export=download&id=1_jMC6ImrPz2KJzj4aWFdTiPS3_2-DcG6"


In [9]:
import pandas as pd
import requests

def load_text_url(url):
    text = requests.get(url).text
    lines = [l.strip() for l in text.splitlines() if l.strip()]
    return pd.DataFrame({"text": lines})

df_english = load_text_url(url_english)
df_indo    = load_text_url(url_indo)


In [10]:
from transformers import MarianTokenizer, MarianMTModel
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "Helsinki-NLP/opus-mt-en-id"

tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to(device)




In [11]:
def translate_batch(text_batch, max_new_tokens=128):
    inputs = tokenizer(text_batch, return_tensors="pt", padding=True, truncation=True).to(device)

    with torch.no_grad():
        out = model.generate(**inputs, max_length=max_new_tokens)

    decoded = tokenizer.batch_decode(out, skip_special_tokens=True)

    # clean
    cleaned = [d.strip() for d in decoded]
    return cleaned


In [12]:
import numpy as np
!pip install sacrebleu
import sacrebleu
from bert_score import score as bert_score
from rouge_score import rouge_scorer

rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

def compute_bleu(ref, pred):
    return sacrebleu.corpus_bleu([pred], [[ref]]).score

def compute_rougeL(ref, pred):
    return rouge.score(ref, pred)['rougeL'].fmeasure

def compute_bertscore(ref, pred):
    P, R, F1 = bert_score([pred], [ref], lang="id")
    return float(F1[0])


Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Downloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-3.2.0 sacrebleu-2.5.1


In [13]:
from tqdm import tqdm

def run_batched_helsinki(df_en, df_id, batch_size=16):

    outputs = []
    bleu_list, rouge_list, bert_list = [], [], []

    print(f"\n=== HELSINKI TRANSLATION — BATCH SIZE {batch_size} ===\n")

    for start in tqdm(range(0, len(df_en), batch_size)):
        end = min(start + batch_size, len(df_en))

        batch_in = df_en["text"].iloc[start:end].tolist()
        batch_gt = df_id["text"].iloc[start:end].tolist()

        # --- translate ---
        batch_pred = translate_batch(batch_in)

        # store
        outputs.extend(batch_pred)

        # --- eval ---
        for pred, gt in zip(batch_pred, batch_gt):
            bleu_list.append(compute_bleu(gt, pred))
            rouge_list.append(compute_rougeL(gt, pred))
            bert_list.append(compute_bertscore(gt, pred))

        torch.cuda.empty_cache()

    scores = {
        "bleu": float(np.mean(bleu_list)),
        "rougeL": float(np.mean(rouge_list)),
        "bertscore": float(np.mean(bert_list)),
    }

    return outputs, scores


In [14]:
outputs, scores = run_batched_helsinki(df_english, df_indo, batch_size=16)
print(scores)


=== HELSINKI TRANSLATION — BATCH SIZE 16 ===



  0%|          | 0/64 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

100%|██████████| 64/64 [36:23<00:00, 34.11s/it]

{'bleu': 35.48670386400386, 'rougeL': 0.6648491815911443, 'bertscore': 0.8990257721644617}



