In [None]:
import os
import json
import re
import requests
from dotenv import load_dotenv

import anthropic
from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
from anthropic.types.messages.batch_create_params import Request

## Load Dataset

In [None]:
def load_data(file_path: str) -> list:
    """
    Load dataset from a JSON file.

    Args:
        file_path (str): Path to the dataset JSON file.

    Returns:
        list: Loaded dataset as a list of dictionaries.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)[:5]

# File paths
DATA_FILE = " "

# Load dataset
data = load_data(DATA_FILE)
print(f"Dataset Loaded: {len(data)} samples.")

## Load API Key & Initialize Client

In [None]:
load_dotenv()
api_key = os.getenv("CLAUDE_API_KEY")
client = anthropic.Anthropic(api_key=api_key)

print("API Key Loaded and Client Initialized.")

## Load Prompts

In [None]:
def load_prompt_set(file_path: str, category: str) -> dict:
    """
    Load prompts for a specific category from a JSON file.

    Args:
        file_path (str): Path to the prompts JSON file.
        category (str): The category key (e.g., 'eng_abbreviations', 'kor_abbreviations').

    Returns:
        dict: Dictionary containing different prompt templates.
    """
    with open(file_path, "r", encoding="utf-8") as file:
        prompt_data = json.load(file)

    return prompt_data.get(category, {})

# File path and category for prompts
PROMPT_FILE = " "
CATEGORY = " "

# Load prompts
prompts = load_prompt_set(PROMPT_FILE, CATEGORY)
print(f"Loaded prompts for category '{CATEGORY}': {list(prompts.keys())}")

## Send Requests to API

In [None]:
batch_ids = {}
for prompt_type in ["zrs_prompt", "cot_prompt", "icl_prompt"]:
    batch_requests = [
        Request(
            custom_id=f"{prompt_type}-request-{idx + 1}",
            params=MessageCreateParamsNonStreaming(
                model="claude-3-5-sonnet-20241022",
                max_tokens=1024,
                system=[{"type": "text", "text": prompts[prompt_type]}],
                messages=[{"role": "user", "content": item["transformed"]}]
            )
        )
        for idx, item in enumerate(data)
    ]

    batch_response = client.messages.batches.create(requests=batch_requests)
    batch_ids[prompt_type] = batch_response.id
print("Batch Requests Sent.")

## Check API Request Status

In [None]:
def check_batch_status(client, batch_id: str) -> str:
    """
    Checks the processing status of a batch request.

    Args:
        client: Anthropic API client instance.
        batch_id (str): Batch request ID.

    Returns:
        str: Status of the batch request (e.g., "completed", "in_progress", "failed").
    """
    batch_status = client.messages.batches.retrieve(batch_id)
    return batch_status.processing_status if hasattr(batch_status, "processing_status") else "Unknown"

for prompt_type, batch_id in batch_ids.items():
    status = check_batch_status(client, batch_id)
    print(f"Batch {batch_id} Status: {status}")

## Retrieve Batch Results & Save to JSON

In [None]:
# Extracting Answer from Response
def extract_answer(response_text: str, category: str) -> str:
    """
    Extracts the answer from the response text based on the category prefix.

    Args:
        response_text (str): Full response text from Claude API.
        category (str): Prompt category (e.g., "eng_abbreviations" or "kor_abbreviations").

    Returns:
        str: Extracted answer or full response if no match is found.
    """
    if category.startswith("eng_"):
        match = re.search(r"Answer:\s*(.*)", response_text)
    else:
        match = re.search(r"답변:\s*(.*)", response_text)

    return match.group(1).strip() if match else response_text  # Extracted answer or full response

# Fetch Batch Results from API
def fetch_batch_results(client, batch_id: str, api_key: str) -> list:
    """
    Retrieves batch results from the Claude API.

    Args:
        client: Anthropic API client.
        batch_id (str): Batch request ID.
        api_key (str): API authentication key.

    Returns:
        list: Parsed JSON responses or None if failed.
    """
    batch_result = client.messages.batches.retrieve(batch_id)
    if not hasattr(batch_result, "results_url"):
        return None

    response = requests.get(batch_result.results_url, headers={
        "x-api-key": api_key,
        "Content-Type": "application/json",
        "anthropic-version": "2023-06-01"
    })

    return [json.loads(line) for line in response.text.splitlines()] if response.status_code == 200 else None

# Mapping Custom ID to Original Input
def map_custom_id_to_input(data: list) -> dict:
    """
    Creates a mapping from custom_id to the original transformed input word.

    Args:
        data (list): List of input data containing "transformed" words.

    Returns:
        dict: Mapping from custom_id to transformed input word.
    """
    return {f"request-{idx + 1}": item["transformed"] for idx, item in enumerate(data)}

id_to_input_map = map_custom_id_to_input(data)

# Processing and Saving Results
def save_results_to_json(results: list, filename: str) -> None:
    """
    Saves processed results into a JSON file.

    Args:
        results (list): List of dictionaries containing "word", "response", and "answer".
        filename (str): Path to the output JSON file.
    """
    os.makedirs(os.path.dirname(filename), exist_ok=True)  # Ensure results folder exists

    with open(filename, "w", encoding="utf-8") as file:
        json.dump(results, file, ensure_ascii=False, indent=4)

    print(f"Results saved: {filename}")

# Fetch, Process, and Store Results
RESULTS_DIR = "results"

for prompt_type, batch_id in batch_ids.items():
    results = fetch_batch_results(client, batch_id, api_key)

    if results:
        formatted_results = []
        for item in results:
            custom_id = item["custom_id"]
            response_text = item["result"]["message"]["content"][0]["text"]

            # Retrieve original word from input map
            input_text = id_to_input_map.get(custom_id.replace(f"{prompt_type}-", ""), "Unknown")

            # Extract answer using appropriate format
            extracted_answer = extract_answer(response_text, CATEGORY)

            formatted_results.append({
                "word": input_text,         # Store original transformed word
                "response": response_text,  # Store full response
                "answer": extracted_answer  # Store extracted answer
            })

        # Save results to JSON file in "results/" directory
        output_file = os.path.join(RESULTS_DIR, f"{prompt_type}_results.json")
        save_results_to_json(formatted_results, output_file)

print("Batch Processing Completed Successfully!")