In [None]:
import os
import numpy as np
import pandas as pd
import time
from datetime import datetime
import json
import glob
from openai import OpenAI, RateLimitError, APIError, APIConnectionError

In [None]:
# Read API key
if not "OPENAI_API_KEY" in os.environ:
    raise ValueError("could not find OPENAI_API_KEY in your path/environment variables")
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  
)

In [None]:
# Define API wrapper parameters
model = "gpt-5.1"
max_retries = 1
initial_backoff = 1.0
max_completion_tokens = 10000
reasoning_effort = "medium" 

# Wrapper function w/ retries and exponential backoff
def call_chat_completion(system_prompt: str, user_prompt: str, model: str = model,
                          max_completion_tokens: int = max_completion_tokens,
                          max_retries: int = max_retries, initial_backoff: float = initial_backoff,
                          reasoning_effort: str = reasoning_effort) -> str:
    """
    Calls the OpenAI ChatCompletion endpoint with retries.
    """

    for attempt in range(1, max_retries + 1):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt},
                ],
                max_completion_tokens=max_completion_tokens,
                reasoning_effort=reasoning_effort,
            )
            # check if max_completion_tokens is too low
            finish_reason = getattr(response.choices[0], "finish_reason", None)
            if finish_reason == "length":
                print(f"Warning: finish_reason='length'. Consider increasing max_completion_tokens.")
            return response.choices[0].message.content.strip()
        

        except (RateLimitError, APIError, APIConnectionError) as e:
            print(f"API error ({type(e).__name__}): retry {attempt}/{max_retries} after {initial_backoff:.1f}s...")
            time.sleep(initial_backoff)
            initial_backoff *= 2  # Exponential backoff

        except Exception as e:
            print(f"Unexpected error: {e}. Retrying in {initial_backoff:.1f}s...")
            time.sleep(initial_backoff)
            initial_backoff *= 2

    raise RuntimeError("Failed to get response after multiple retries.")


In [None]:
system_prompt_path = "prompts\\system_prompt.txt"    
if not os.path.exists(system_prompt_path):
        print(f"Prompt file '{system_prompt_path}' not found.")


with open(system_prompt_path, "r", encoding="utf-8") as pf:
    system_prompt = pf.read().strip()
print(system_prompt)

In [None]:
# TEST RESPONSE: Run function and return output
test_response = call_chat_completion(system_prompt = "", user_prompt="say test")
print(test_response)

In [None]:
input_dir = "inputs" #set your input directory with the a .txt file for each question you want to generate exemplars for
output_dir = "outputs" #set output directory
os.makedirs(output_dir, exist_ok=True)
prompts_dir = "prompts"
system_prompt_path = "prompts\\system_prompt.txt"

# read in system_prompt
if not os.path.exists(system_prompt_path):
        print(f"Prompt file '{system_prompt_path}' not found.")
with open(system_prompt_path, "r", encoding="utf-8") as pf:
    system_prompt = pf.read().strip()

def process_files(
    performance_level: str,
    input_dir: str = input_dir,
    output_dir: str = output_dir,
    prompts_dir: str = prompts_dir,
):
    """
    Reads .txt files from input_dir, uses a performance-level-specific prompt
    from prompts_dir, sends content to GPT, and writes responses to output_dir.
    
    The prompt file in prompts_dir must be named like '<performance_level>.txt'
    (e.g. 'poor.txt', 'excellent.txt'), and must contain exactly
    two sections separated by a line of '---'.
    The first section becomes the prefix, the second becomes the suffix.
    """
    # Load performance-level-specific prompt
    prompt_path = os.path.join(prompts_dir, f"{performance_level}.txt")
    if not os.path.exists(prompt_path):
        print(f"Prompt file '{prompt_path}' not found.")
        return

    with open(prompt_path, "r", encoding="utf-8") as pf:
        prompt_content = pf.read().strip()

    if "---" in prompt_content:
        parts = prompt_content.split("---", 1)
    else:
        print(f"Warning: No separator found in {prompt_path}. Using entire file as prefix.")
        parts = [prompt_content, ""]

    per_file_prefix = parts[0].strip()
    per_file_suffix = parts[1].strip() if len(parts) > 1 else ""

    print(f"Using performance level: {performance_level}")
    print(f"  Prefix length: {len(per_file_prefix)} chars")
    print(f"  Suffix length: {len(per_file_suffix)} chars")

    # Process input files
    txt_paths = sorted(glob.glob(os.path.join(input_dir, "*.txt")))
    if not txt_paths:
        print(f"No .txt files found in {input_dir}.")
        return

    for path in txt_paths:
        filename = os.path.basename(path)
        out_path = os.path.join(output_dir, f"{performance_level}_{filename}")
        print(f"\nProcessing {filename} -> {out_path}")

        with open(path, "r", encoding="utf-8") as f:
            content = f.read().strip()
        if not content:
            print(" Input file is empty â€” skipping.")
            continue

        # Build the user prompt
        user_prompt = f"{per_file_prefix}{content}{per_file_suffix}"

        try:
            output_text = call_chat_completion(
                system_prompt=system_prompt,
                user_prompt=user_prompt
                )
            print(len(output_text))
        except Exception as e:
            print(f"Failed to process {filename}: {e}")
            output_text = f"[ERROR] Could not get response: {e}"

        # Save output_text to output file
        with open(out_path, "w", encoding="utf-8") as outf:
            outf.write(output_text)

        print(f" Saved response ({len(output_text)} chars).")

In [None]:
if __name__ == "__main__":
    process_files(performance_level="poor")

In [None]:
if __name__ == "__main__":
    process_files(performance_level="sufficient")

In [None]:
if __name__ == "__main__":
    process_files(performance_level="good")

In [None]:
if __name__ == "__main__":
    process_files(performance_level="excellent")