In [None]:
import os
import google.generativeai as genai
import pandas as pd
import json
import re
import time
from libraries.widget import build_ui, save_state, load_state

widgets_dict = build_ui()

PROMPT

In [None]:
# --- Lấy giá trị từ widgets ---
input_file   = widgets_dict["input_drop"].value
prompt_file  = widgets_dict["prompt_drop"].value
article_col  = widgets_dict["article_drop"].value
summary_col  = widgets_dict["summary_drop"].value
start_row    = widgets_dict["start_data"].value
end_row      = widgets_dict["end_data"].value
api_key      = widgets_dict["API_drop"].value
model_name   = widgets_dict["model_drop"].value

# --- Xử lý tên file và đường dẫn ---
file_name    = os.path.splitext(input_file)[0]
input_path   = f"Unique/Evaluation/{file_name}.xlsx"
data         = pd.read_excel(input_path)
prompt       = prompt_file

# --- Xác định start và end row ---
start = int(start_row) if start_row and start_row.isdigit() and int(start_row) > 1 else 1
end   = int(end_row)   if end_row and end_row.isdigit() and int(end_row) > 1 else 1

# --- Xác định đường dẫn output ---
output_folder = f"Outputs/Evaluation/{file_name}"
if end == start:
    output_json = f"{output_folder}/{file_name} {start:05}.json"
    output_xlsx = f"{output_folder}/{file_name} {start:05}.xlsx"
else:
    output_json = f"{output_folder}/{file_name} {start:05}-{end:05}.json"
    output_xlsx = f"{output_folder}/{file_name} {start:05}-{end:05}.xlsx"

# --- Đặt tên cột article và summary ---
article_row = article_col if article_col else "article"
summary_row = summary_col if summary_col else "summary"
export_option = widgets_dict["export_drop"].value


# --- Hiển thị thông tin ---
print(f"API key: {api_key}")
print(f"Model:   {model_name}")
print(f"Input:   {file_name}")
print(f"Article: {article_row}")
print(f"Summary: {summary_row}")
print(f"Output:  {file_name} {start:05}" if end == start else f"Output:  {file_name} {start:05}-{end:05}")

AI MODEL


In [None]:
GEMINI_API_KEY = api_key
SCORER_ID = model_name
genai.configure(api_key = GEMINI_API_KEY)
with open(f"Prompts/{prompt}", "r", encoding="utf-8") as file:
    evaluation_prompt = file.read()
scorer_model = genai.GenerativeModel(SCORER_ID, system_instruction = evaluation_prompt)

FUNCTIONS


In [None]:
def evaluate_summary(article, summary):
    
    user_prompt = f"""
    Hãy đánh giá văn bản tóm tắt dưới đây dựa trên văn bản gốc được cung cấp:
    Văn bản gốc:
    {article}
    
    Văn bản tóm tắt:
    {summary}
    """

    overall_prompt = evaluation_prompt + "\n" + user_prompt
    
    try:
        response = scorer_model.generate_content(
            overall_prompt,
            generation_config=genai.GenerationConfig(
                temperature=0.1,
                top_p=0.9,
                top_k=50          
            ))
        return response.text
    
    except Exception as e:
            return None

In [None]:
def fix_nested_quotes(json_str):
    def replace_nested_quotes(match):
        key, value = match.groups()
        fixed_value = value.strip()
        
        if fixed_value.startswith('"') and fixed_value.endswith('"'):
            inner_content = fixed_value[1:-1]
            if ('\\"' not in inner_content and '"' in inner_content) or ('\\"' in inner_content and '"' not in inner_content):
                inner_content = inner_content.replace('"', '\\"')
            fixed_value = f'"{inner_content}"'
        
        return f'{key}: {fixed_value}'
    
    json_str = re.sub(r'(\"[^"]+\"): (.*?)(?=\n|$)', replace_nested_quotes, json_str)
    return json_str

In [None]:
def decode_fix(result, article, summary):
    retries = 0
    while retries < 3:
        try:
            json_match = re.search(r'```json\n(\{.*?\})\n```', result, flags=re.DOTALL)
            json_str = json_match.group(1) if json_match else result.strip()
            json_str = fix_nested_quotes(json_str)
            
            print("Attempt:", retries + 1)
            return json.loads(json_str)
        except json.JSONDecodeError as e:
            retries += 1
            print(f"Decode Error! Attempt: {retries} - {e}")
            result = evaluate_summary(article, summary)

    print("Failed to decode JSON!")
    return json_str

In [None]:
def main(article_row, summary_row):
    result_list = []
    output_data = []

    failSteak = 0

    for index, row in data.iloc[start-1:end].iterrows():
        print(f"EVALUATING SUMMARY {index+1} ...")
        
        article = row[article_row]
        summary = row[summary_row]
        
        result = evaluate_summary(article, summary)
        print("Raw result:", result)
        
        attempt = 0
        while result is None and attempt < 6:
            attempt += 1
            wait_time = 11 - attempt
            print(f"Attempt: {attempt}. {wait_time}s...")
            time.sleep(wait_time)
            result = evaluate_summary(article, summary)
            print("Raw result:", result)
            
        if result is None:
            print(f"Summary {index+1} Failed to process. Skipping...")
            print("-" * 10)
            print("\n")
            failSteak += 1
            if failSteak >= 3:
                break
        else:
            failSteak = 0
            print(f"Process successfully")
            result = re.sub(r'\*', '', result)  
            result_list.append(result)
            decoded_result = decode_fix(result, article, summary)
            decoded_result_str = json.dumps(decoded_result, ensure_ascii=False)
            decoded_result_str = re.sub(r'\s+', ' ', decoded_result_str).strip()
            decoded_result = json.loads(decoded_result_str)
            if decoded_result is None:
                print("-" * 10)
                print("\n")
            else:
                print(f"Article: {article}")
                print(f"Summary: {summary}")
                print(f"Result: {decoded_result}")
                print("-" * 10)
                print("\n")
        
        output_data.append({
            "Index": f"{index+1:05}",
            "Article": article,
            "Summary": summary,
            "Result": decoded_result
        })
        
    output_dir = os.path.dirname(output_json)
    os.makedirs(output_dir, exist_ok=True)

    with open(output_json, "w", encoding="utf-8") as json_file:
        json.dump(output_data, json_file, ensure_ascii=False, indent=4, separators=(",", ": "))

    print(f"{output_json} saved successfully!")

In [None]:
summary_row = "generated_summary"
main(article_row, summary_row)

In [None]:
# summary_row = "reference_summary"
# main(article_row, summary_row)

EXPORT


In [None]:
if export_option == "YES":
    with open(output_json, 'r', encoding='utf-8') as file:
        data = json.load(file)
    df = pd.json_normalize(data)
    df.to_excel(output_xlsx, index=False)
    print(f"{output_json} Reading...")
    print(f"{output_xlsx} saved successfully!")
else:
    print("Skip...")