In [None]:
import os
import re
import json
import time

import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI

## Load Dataset

In [None]:
def load_data(file_path: str) -> list:
    """
    Load dataset from a JSON file.

    Args:
        file_path (str): Path to the dataset JSON file.

    Returns:
        list: Loaded dataset as a list of dictionaries.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

# File path
DATA_FILE = " "

# Load dataset
data = load_data(DATA_FILE)
print(f"Dataset Loaded: {len(data)} samples.")

## Load API Key & Initialize Client

In [None]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
print("API Key Loaded and Client Initialized.")

## Load Prompts

In [None]:
def load_prompts(file_path: str, category: str) -> dict:
    """
    Load prompts for a specific category from a JSON file.

    Args:
        file_path (str): Path to the prompts JSON file.
        category (str): The category key (e.g., 'eng_abbreviations', 'kor_abbreviations').

    Returns:
        dict: Dictionary containing different prompt templates.
    """
    with open(file_path, "r", encoding="utf-8") as file:
        prompt_data = json.load(file)

    return prompt_data.get(category, {})

# File path and category for prompts
PROMPT_FILE = " "  
CATEGORY = " "  

# Load prompts
prompts = load_prompts(PROMPT_FILE, CATEGORY)
print(f"Loaded prompts for category '{CATEGORY}': {list(prompts.keys())}")

## Send Requests to API

In [None]:
# Call O3-Mini API
def call_o3_mini(prompt, max_retries=3):
    """
    Calls the O3-Mini API with the given prompt.

    Args:
        prompt (str): Input text to send to the API.
        max_retries (int): Number of retries in case of failure.

    Returns:
        str: API response text.
    """
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="o3-mini",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt}
                ],
                temperature=1.0,
                reasoning_effort="high"
            )

            return response.choices[0].message.content.strip()

        except Exception as e:
            if attempt < max_retries - 1:
                print(f"Error on attempt {attempt + 1}: {e}. Retrying...")
                time.sleep(5)  # Retry after a short delay
            else:
                print(f"Final attempt failed: {e}")
                return "Error"

## Extract Answer from Response

In [None]:
def extract_answer(response_text: str, category: str) -> str:
    """
    Extracts the answer from response text based on the category prefix.

    Args:
        response_text (str): Full response text from Gemini API.
        category (str): Prompt category (e.g., "eng_abbreviations" or "kor_abbreviations").

    Returns:
        str: Extracted answer or full response if no match is found.
    """
    if category.startswith("eng_"):
        match = re.search(r"Answer:\s*(.*)", response_text)
    else:
        match = re.search(r"답변:\s*(.*)", response_text)

    return match.group(1).strip() if match else response_text  # Extracted answer or full response

## Save Processed Results

In [None]:
def save_results(results: list, filename: str):
    """
    Save processed results into a JSON file.

    Args:
        results (list): List of dictionaries containing 'word' and 'response'.
        filename (str): Path to save the JSON file.
    """
    os.makedirs(os.path.dirname(filename), exist_ok=True)

    with open(filename, "w", encoding="utf-8") as file:
        json.dump(results, file, ensure_ascii=False, indent=4)

    print(f"Results saved: {filename}")

## Execute API Calls and Store Results

In [None]:
# Directory to save results
RESULTS_DIR = "results"
CHECKPOINT_DIR = "checkpoint"

# Ensure checkpoint directory exists
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

for prompt_type, prompt_text in prompts.items():
    results = []
    progress_file = os.path.join(CHECKPOINT_DIR, f"{prompt_type}_progress.txt")

    # Load progress checkpoint
    try:
        with open(progress_file, "r") as f:
            start_index = int(f.read().strip())
    except FileNotFoundError:
        start_index = 0

    print(f"Processing {prompt_type} (Starting from index {start_index})...")

    for idx in range(start_index, len(data)):  
        input_word = data[idx]["transformed"]  
        prompt = f"{input_word}\n{prompt_text}"
        response_text = call_o3_mini(prompt)

        if response_text == "Error":
            print(f"Error processing {input_word} at index {idx}. Skipping...")
            continue

        # Extract Answer
        extracted_answer = extract_answer(response_text, CATEGORY)

        results.append({
            "word": input_word,    
            "response": response_text,  
            "answer": extracted_answer
        })

        # Save progress checkpoint
        with open(progress_file, "w") as f:
            f.write(str(idx + 1))

    # Save results
    output_filename = os.path.join(RESULTS_DIR, f"{prompt_type}_results.json")
    save_results(results, output_filename)

print("All Processing Completed Successfully!")