In [5]:
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
import instructor
import openai
from atomic_agents.lib.components.agent_memory import AgentMemory
from atomic_agents.agents.base_agent import BaseAgent, BaseAgentConfig
from atomic_agents.lib.components.system_prompt_generator import SystemPromptGenerator

# System prompts for each agent
reader_prompt = SystemPromptGenerator(
    background=["You are a clinical trial reader.", "..."],
    steps=["Extract relevant sections from the data."],
    output_instructions=["Output extracted text."]
)

classifier_prompt = SystemPromptGenerator(
    background=["You are a medical reasoning classifier.", "..."],
    steps=["Decide entailment or contradiction."],
    output_instructions=["Output 'Entailment' or 'Contradiction' only."]
)

# Memory for each agent
reader_memory = AgentMemory()
classifier_memory = AgentMemory()

# Model clients
client = instructor.from_openai(openai.OpenAI())

# Agents
reader_agent = BaseAgent(config=BaseAgentConfig(
    client=client,
    model="gpt-4o-mini",
    system_prompt_generator=reader_prompt,
    memory=reader_memory
))

classifier_agent = BaseAgent(config=BaseAgentConfig(
    client=client,
    model="gpt-4o-mini",
    system_prompt_generator=classifier_prompt,
    memory=classifier_memory
))

In [2]:
import json
import sys
import os

def load_CT(name:str)-> dict:
    with open(os.path.join("training_data","CT json", name+".json"), "r") as f:
        data = json.load(f)
    return data

In [22]:
def multiagent_pipeline(statement, primary_id, secondary_id=None):
    # Load trial data
    primary_data = None
    secondary_data = None
    try:
        primary_data = load_CT(primary_id) if primary_id else None
    except Exception as e:
        primary_data = {"error": f"Could not load primary trial {primary_id}: {e}"}
    if secondary_id:
        try:
            secondary_data = load_CT(secondary_id)
        except Exception as e:
            secondary_data = {"error": f"Could not load secondary trial {secondary_id}: {e}"}

    # Build reader input payload
    reader_input = {
        "statement": statement,
        "primary_id": primary_id,
        "secondary_id": secondary_id,
        "primary_trial": primary_data,
        "secondary_trial": secondary_data
    }
    reader_output = reader_agent.run(reader_input).chat_message

    # Classifier Agent receives extracted data
    classifier_input = {
        "statement": statement,
        "extracted_data": reader_output,
        "primary_id": primary_id,
        "secondary_id": secondary_id
    }
    classifier_output = classifier_agent.run(classifier_input).chat_message
    return classifier_output



In [28]:
# Example usage
result = multiagent_pipeline(
    statement="there is a 13.2% difference between the results from the two the primary trial cohorts",
    primary_id="NCT00066573"
)
print(result)

Contradiction


In [25]:
import json
from tqdm import tqdm
train_path = "training_data/train.json"

with open(train_path, "r", encoding="utf-8") as f:
    raw = f.read().strip()

train_data = {}
if raw:
    try:
        train_data = json.loads(raw)
    except json.JSONDecodeError as e:
        print("Failed to parse train.json as single JSON document:", e)
        train_data = {}

# Expect train_data as dict[id] = example
examples = list(train_data.items())[:100]
print(f"Loaded {len(examples)} keyed training examples")

results = []
correct = 0
total = len(examples)

for ex_id, example in tqdm(examples):
    if not isinstance(example, dict):
        continue
    statement = example.get("Statement") or example.get("statement")
    primary_id = example.get("Primary_id") or example.get("primary_id")
    secondary_id = example.get("Secondary_id") or example.get("secondary_id")
    expected = example.get("Label") or example.get("label")
    if statement is None or primary_id is None:
        results.append({"id": ex_id, "statement": statement, "primary_id": primary_id, "secondary_id": secondary_id, "expected": expected, "output": "SKIPPED_MISSING_FIELDS"})
        continue
    try:
        output = multiagent_pipeline(statement, primary_id, secondary_id)
    except Exception as e:
        output = f"ERROR: {e}"
    results.append({
        "id": ex_id,
        "statement": statement,
        "primary_id": primary_id,
        "secondary_id": secondary_id,
        "expected": expected,
        "output": output
    })
    if expected is not None and str(output).strip().lower() == str(expected).strip().lower():
        correct += 1

accuracy = correct / total if total > 0 else 0
print(f"Accuracy: {accuracy:.2%} ({correct}/{total})")

Loaded 100 keyed training examples


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [05:00<00:00,  3.01s/it]

Accuracy: 49.00% (49/100)





In [None]:
# Show a sample of incorrect predictions for error analysis
incorrect = [r for r in results if r.get("expected") is not None and str(r.get("output")).strip().lower() != str(r.get("expected")).strip().lower()]
print(f"Number of incorrect predictions: {len(incorrect)} (out of {len(results)})")
for i, r in enumerate(incorrect[:5]):
    print(f"Example {i+1} / ID: {r['id']}")
    print(f"Statement: {r['statement']}")
    print(f"Primary ID: {r['primary_id']}  Secondary ID: {r.get('secondary_id')}")
    print(f"Expected: {r['expected']}")
    print(f"Output: {r['output']}")
    print("-")

Number of incorrect predictions: 51 (out of 100)
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Entailment
Output: Contradiction
-
Expected: Entailment
Output: Contradiction
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Entailment
Output: Contradiction
-
Expected: Entailment
Output: Contradiction
-
Expected: Entailment
Output: Contradiction
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expected: Entailment
Output: Contradiction
-
Expected: Entailment
Output: Contradiction
-
Expected: Entailment
Output: Contradiction
-
Expected: Contradiction
Output: Entailment
-
Expected: Contradiction
Output: Entailment
-
Expect