In [4]:
import google.generativeai as genai
import time
import re
import os
from google.api_core import exceptions

# 1. CẤU HÌNH
API_KEY = "AIzaSyCrk5HNqKq_VTFGb_KbFhYv0OpwhAR_MxQ"  
genai.configure(api_key=API_KEY)

model = genai.GenerativeModel('gemini-2.5-flash')

input_dir = "/kaggle/input/src-data" 
output_dir = "/kaggle/working"      
output_file = os.path.join(output_dir, "summary_evaluation_results.txt")

# 2. HÀM ĐỌC & XỬ LÝ
def read_file(path):
    if not os.path.exists(path): return []
    with open(path, 'r', encoding='utf-8') as f:
        return [l.strip() for l in f.readlines() if l.strip()]

en_lines = read_file(os.path.join(input_dir, "english_original.txt"))
ref_lines = read_file(os.path.join(input_dir, "vietnamese_reference.txt"))
trans_lines = read_file(os.path.join(input_dir, "vietnamese_translated.txt"))

min_len = min(len(en_lines), len(ref_lines), len(trans_lines))
en_lines, ref_lines, trans_lines = en_lines[:min_len], ref_lines[:min_len], trans_lines[:min_len]

BATCH_SIZE = 128
all_scores = []

def get_scores_with_retry(batch_data, retries=5):
    prompt = (
        "Role: Translation QA. Task: Score Target vs Reference based on Source.\n"
        "Scale: 0-10.\n"
        "Format per line: ID: [id] | Score: [number] | Reason: [short text]\n\n"
        "Data:\n"
    )
    for item in batch_data:
        prompt += f"ID: {item['id']} | Src: {item['en']} | Ref: {item['ref']} | Tgt: {item['target']}\n"

    wait_time = 20
    
    for attempt in range(retries):
        try:
            response = model.generate_content(prompt)
            return response.text.strip()
        except exceptions.ResourceExhausted:
            print(f"   [!] Quá hạn mức (429). Đang nghỉ {wait_time}s...")
            time.sleep(wait_time)
            wait_time += 10
        except Exception as e:
            print(f"   [!] Lỗi khác: {e}")
            time.sleep(5)
    return None

# 3. CHẠY CHÍNH
print(f"Bắt đầu chấm câu với Gemini 2.5 Flash...")

if os.path.exists(output_file):
    os.remove(output_file)

for i in range(0, min_len, BATCH_SIZE):
    batch = []
    for j in range(i, min(i + BATCH_SIZE, min_len)):
        batch.append({"id": j + 1, "en": en_lines[j], "ref": ref_lines[j], "target": trans_lines[j]})
    
    print(f"-> Batch {i//BATCH_SIZE + 1}: Dòng {i+1} - {i+len(batch)}")
    raw_result = get_scores_with_retry(batch)
    
    if raw_result:
        with open(output_file, 'a', encoding='utf-8') as f_out:
            f_out.write(raw_result + "\n")
            found_scores = re.findall(r"Score.*?(\d+(?:\.\d+)?)", raw_result, re.IGNORECASE)
            
            valid_batch_scores = []
            for s in found_scores:
                try:
                    val = float(s)
                    if 0 <= val <= 10:
                        all_scores.append(val)
                        valid_batch_scores.append(val)
                except: pass
            
            if valid_batch_scores:
                avg_now = sum(valid_batch_scores)/len(valid_batch_scores)
                print(f"   [OK] Batch Avg: {avg_now:.2f}")
            else:
                print(f"   [Warning] Không thấy điểm. Raw: {raw_result[:50]}...")
                
    time.sleep(5)

print(f"\n--- HOÀN THÀNH. Tổng số điểm thu được: {len(all_scores)}/{min_len} ---")
if all_scores:
    print(f"Điểm trung bình toàn bộ: {sum(all_scores)/len(all_scores):.2f}")

Bắt đầu chấm câu với Gemini 2.5 Flash...
-> Batch 1: Dòng 1 - 128
   [OK] Batch Avg: 7.07
-> Batch 2: Dòng 129 - 256
   [OK] Batch Avg: 5.78
-> Batch 3: Dòng 257 - 384
   [OK] Batch Avg: 6.04
-> Batch 4: Dòng 385 - 512
   [OK] Batch Avg: 6.73
-> Batch 5: Dòng 513 - 640
   [OK] Batch Avg: 5.26
-> Batch 6: Dòng 641 - 768
   [OK] Batch Avg: 6.59
-> Batch 7: Dòng 769 - 896
   [OK] Batch Avg: 6.12
-> Batch 8: Dòng 897 - 1024
   [OK] Batch Avg: 5.86
-> Batch 9: Dòng 1025 - 1152
   [OK] Batch Avg: 6.02
-> Batch 10: Dòng 1153 - 1268
   [OK] Batch Avg: 4.42

--- HOÀN THÀNH. Tổng số điểm thu được: 1268/1268 ---
Điểm trung bình toàn bộ: 6.00
