In [None]:
import json

# 1. Load the original JSON
with open('new_sample_output_contradiction.json', 'r', encoding='utf-8') as f:
    full_data = json.load(f)

# 2. Prepare a dict to hold only the agent_responses
filtered = {}

# 3. Iterate and extract
for paper_id, paper_data in full_data.items():
    agent_resp = paper_data.get('agent_response')
    if agent_resp is not None:
        filtered[paper_id] = agent_resp
    else:
        # If you want to include empty lists for those without an agent_response:
        # filtered[paper_id] = []
        pass

# 4. Save to new file
with open('agent_responses_only.json', 'w', encoding='utf-8') as f:
    json.dump(filtered, f, indent=4, ensure_ascii=False)

print(f"Extracted agent_response for {len(filtered)} papers into 'agent_responses_only.json'")

In [None]:
import os
import json
import time
from openai import OpenAI, OpenAIError

# ——————————————————————
# 1. Setup
# ——————————————————————

# Option A: Read from environment (recommended)
#api_key = os.getenv("OPENAI_API_KEY")

# Option B: Hard-code it (only for quick tests; don't commit this)
api_key = API_KEY

if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable is not set")

client = OpenAI(api_key=api_key)

INPUT_FILE   = 'agent_responses_only.json'
OUTPUT_FILE  = 'agent_responses_with_predicted_aspects.json'
MODEL        = 'gpt-4o-mini'

# SYSTEM_PROMPT = (
#     "You will be given a statement of contradiction between two reviews "
#     "and some evidence text. Identify and return the single best aspect "
#     "that this contradiction is about, choosing from: "
#     "Substance, Motivation, Clarity, Meaningful comparison, Originality, Soundness, or Replicability. "
#     "Return *only* the aspect name."
# )
SYSTEM_PROMPT = (
    "You will receive two contradictory review statements and their evidence. "
    "Determine which single aspect is at the root of their disagreement. "
    "Select exactly one aspect from this list—Substance, Motivation, Clarity, Meaningful comparison, "
    "Originality, Soundness, Replicability—and return only that aspect name, with no additional text."
)
# ——————————————————————
# 2. Load existing data
# ——————————————————————

with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    data = json.load(f)

# ——————————————————————
# 3. Iterate and call the API
# ——————————————————————

for paper_id, entries in data.items():
    print(f"Processing {paper_id} ({len(entries)} entries)…")
    for entry in entries:
        if 'predicted_aspect' in entry:
            continue

        prompt = (
            f"Contradiction:\n{entry['contradiction_statement']}\n\n"
            f"Evidence:\n{entry['evidence']}\n\n"
            "Which aspect does this primarily concern?"
        )

        try:
            resp = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {"role": "system",  "content": SYSTEM_PROMPT},
                    {"role": "user",    "content": prompt}
                ],
                temperature=0.1,
                max_tokens=32,
            )
            entry['predicted_aspect'] = resp.choices[0].message.content.strip()
            time.sleep(0.3)

        except OpenAIError as e:
            print(f"  → API error for entry: {e}")
            entry['predicted_aspect'] = None

# ——————————————————————
# 4. Save augmented data
# ——————————————————————

with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(data, f, indent=4, ensure_ascii=False)

print(f"\nAll done! Results saved to '{OUTPUT_FILE}'.")

In [None]:
import json
from collections import defaultdict

INPUT_FILE = 'agent_responses_with_predicted_aspects.json'

# Load the data
with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Containers for stats
paper_stats = {}
total = 0
correct = 0

for paper_id, entries in data.items():
    paper_total = 0
    paper_correct = 0

    for entry in entries:
        id_aspect = entry.get('identified_aspect')
        pred_aspect = entry.get('predicted_aspect')

        # Only count if we have both values
        if id_aspect and pred_aspect:
            paper_total += 1
            total       += 1

            if id_aspect.lower() == pred_aspect.lower():
                paper_correct += 1
                correct       += 1

    # Avoid division by zero
    accuracy = (paper_correct / paper_total) if paper_total else 0.0
    paper_stats[paper_id] = {
        'total_pairs': paper_total,
        'correct':     paper_correct,
        'accuracy':    accuracy
    }

# Global accuracy
global_accuracy = (correct / total) if total else 0.0

# Print per-paper results
print("Per-paper aspect match summary:")
for pid, stats in paper_stats.items():
    print(f"  {pid}: {stats['correct']}/{stats['total_pairs']} correct — accuracy {stats['accuracy']:.2%}")

# Print overall
print(f"\nOverall: {correct}/{total} correct — global accuracy {global_accuracy:.2%}")