In [8]:
import os
import json
import re
import time
from dotenv import load_dotenv 

import google.generativeai as genai 

## Load Dataset

In [None]:
def load_data(file_path: str) -> list:
    """
    Load dataset from a JSON file.

    Args:
        file_path (str): Path to the dataset JSON file.

    Returns:
        list: Loaded dataset as a list of dictionaries.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

# File path for dataset
DATA_FILE = " "  

# Load dataset
data = load_data(DATA_FILE)
print(f"Dataset Loaded: {len(data)} samples.")

## Load API Key & Initialize Client

In [None]:
# Load API key
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=api_key)

print("API Key Loaded and Gemini Client Initialized.")

## Load Prompts

In [None]:
def load_prompts(file_path: str, category: str) -> dict:
    """
    Load prompts for a specific category from a JSON file.

    Args:
        file_path (str): Path to the prompts JSON file.
        category (str): The category key (e.g., 'eng_abbreviations', 'kor_abbreviations').

    Returns:
        dict: Dictionary containing different prompt templates.
    """
    with open(file_path, "r", encoding="utf-8") as file:
        prompt_data = json.load(file)

    return prompt_data.get(category, {})

# File path and category for prompts
PROMPT_FILE = " "  
CATEGORY = " "  

# Load prompts
prompts = load_prompts(PROMPT_FILE, CATEGORY)
print(f"Loaded prompts for category '{CATEGORY}': {list(prompts.keys())}")

## Send Requests to API

In [None]:
def call_gemini_api(prompt_text: str, input_word: str) -> str:
    """
    Call the Gemini API with a specific prompt and input word.

    Args:
        prompt_text (str): The system prompt to use.
        input_word (str): The input word to process.

    Returns:
        str: The API response text.
    """
    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        system_instruction=prompt_text
    )

    try:
        response = model.generate_content(f"{input_word}")
        return response.text.strip()
    except Exception as e:
        print(f"Error processing {input_word}: {e}")
        return "Error"

print("Gemini API Function Ready.")

## Extract Answer from Response

In [9]:
def extract_answer(response_text: str, category: str) -> str:
    """
    Extracts the answer from response text based on the category prefix.

    Args:
        response_text (str): Full response text from Gemini API.
        category (str): Prompt category (e.g., "eng_abbreviations" or "kor_abbreviations").

    Returns:
        str: Extracted answer or full response if no match is found.
    """
    if category.startswith("eng_"):
        match = re.search(r"Answer:\s*(.*)", response_text)
    else:
        match = re.search(r"답변:\s*(.*)", response_text)

    return match.group(1).strip() if match else response_text  # Extracted answer or full response

## Save Processed Results

In [11]:
def save_results(results: list, filename: str):
    """
    Save processed results into a JSON file.

    Args:
        results (list): List of dictionaries containing "word", "response", and "answer".
        filename (str): Path to the output JSON file.
    """
    os.makedirs(os.path.dirname(filename), exist_ok=True)

    with open(filename, "w", encoding="utf-8") as file:
        json.dump(results, file, ensure_ascii=False, indent=4)

    print(f"Results saved: {filename}")

## Execute API Calls and Store Results

In [None]:
# Directory to save results
RESULTS_DIR = "results"
CHECKPOINT_DIR = "checkpoint" 

# Ensure checkpoint directory exists
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

for prompt_type, prompt_text in prompts.items():
    results = []
    progress_file = os.path.join(CHECKPOINT_DIR, f"{prompt_type}_progress.txt")

    # Load progress checkpoint or create if missing
    if os.path.exists(progress_file):
        with open(progress_file, "r") as f:
            start_index = int(f.read().strip() or "0")  # Default to 0 if file is empty
    else:
        start_index = 0
        with open(progress_file, "w") as f:
            f.write("0")  # Create file with initial index 0

    print(f"Processing {prompt_type} (Starting from index {start_index})...")

    for idx in range(start_index, len(data)):  
        input_word = data[idx]["transformed"]  
        response_text = call_gemini_api(prompt_text, input_word)

        # Extract answer based on category (eng_ → "Answer:", kor_ → "답변:")
        extracted_answer = extract_answer(response_text, CATEGORY)

        results.append({
            "word": input_word,     # Store original transformed word
            "response": response_text,  # Store full response
            "answer": extracted_answer  # Store extracted answer
        })

        # Save progress checkpoint
        with open(progress_file, "w") as f:
            f.write(str(idx + 1))

        # Pause every 15 requests
        if (idx + 1) % 15 == 0 and (idx + 1) < len(data):
            print("Sleeping for 60 seconds...")
            time.sleep(60)

    # Save results
    output_filename = os.path.join(RESULTS_DIR, f"{prompt_type}_results.json")
    save_results(results, output_filename)

print("All Processing Completed Successfully!")