In [3]:
import pandas as pd
import time
import os
from openai import OpenAI, RateLimitError
from secret import apikey

# Initialize GPT-4o client
client = OpenAI(api_key=apikey)
gpt_model = "gpt-4o"

# Instructions to GPT-4o
instructions = (
    "You will be shown a prompt with a sentence containing a placeholder [ ]. "
    "The prompt will be followed by four options labeled A, B, C, and D. "
    "Select the single best-fitting option for the placeholder. "
    "Respond with ONLY the letter: A, B, C, or D."
)

# GPT-4o call with retry logic
def call_gpt4(prompt: str, instructions: str) -> str:
    for attempt in range(3):
        try:
            response = client.chat.completions.create(
                model=gpt_model,
                messages=[
                    {"role": "system", "content": instructions},
                    {"role": "user", "content": prompt}
                ],
                temperature=0
            )
            content = response.choices[0].message.content.strip()
            for letter in ["A", "B", "C", "D"]:
                if content.upper().startswith(letter):
                    return letter
            return "Invalid"
        except RateLimitError:
            print("Rate limit hit. Retrying...")
            time.sleep(5)
        except Exception as e:
            print(f"Error: {e}")
            return "Error"
    return "Failed after 3 retries"

# Process one shuffled file
def process_file(input_file):
    df = pd.read_csv(input_file)
    output_file = input_file.replace(".csv", "_Shfl_gpt4o_output.csv")

    if os.path.exists(output_file):
        existing = pd.read_csv(output_file)
        start_index = len(existing)
        df = df.iloc[start_index:].copy()
        results = list(existing["GPT-4o Choice"])
        print(f"🔄 Resuming from row {start_index} of {input_file}...")
    else:
        results = []
        start_index = 0

    for i, row in df.iterrows():
        # Remap shuffled columns back to A–D for LLM
        prompt_text = (
            f"Domain: {row['Domain']}\n"
            f"Prompt: {row['Prompt']}\n"
            f"A: {row['D']}\n"
            f"B: {row['C']}\n"
            f"C: {row['B']}\n"
            f"D: {row['A']}\n"
            "Which option best completes the sentence?"
        )
        answer = call_gpt4(prompt_text, instructions)
        results.append(answer)

        # Save incrementally
        df_slice = pd.read_csv(input_file).iloc[:start_index + len(results)].copy()
        df_slice["GPT-4o Choice"] = results
        df_slice.to_csv(output_file, index=False, encoding="utf-8-sig")

    print(f"✅ Done: {output_file}")

# Run on all shuffled files
for file in ["High - Shfl.csv", "Mid - Shfl.csv", "Low - Shfl.csv"]:
    process_file(file)


✅ Done: High - Shfl_Shfl_gpt4o_output.csv
✅ Done: Mid - Shfl_Shfl_gpt4o_output.csv
✅ Done: Low - Shfl_Shfl_gpt4o_output.csv
