In [1]:
from main import process_sample
from utils import load_environment

load_environment()

import pandas as pd

In [2]:
START_SAMPLE_ID = 0
END_SAMPLE_ID = 1

df_examples = pd.read_csv("../data/examples.csv")[START_SAMPLE_ID:END_SAMPLE_ID]
df_evaluation_prompts = pd.read_csv("../data/evaluation_prompts.csv")

records = df_examples.to_dict(orient="records")

# Single sample through pipeline (Anthropic)

In [3]:
claude_sonnet_3_7 = process_sample(
    sample_id = 0,
    record = records[0],
    test_models = ["ollama:llama3.2"],
    judge_models = [
        "anthropic:claude-3-7-sonnet-20250219",
        "anthropic:claude-3-5-sonnet-20240620",
        ],
    evaluation_prompts_file_path = "../data/evaluation_prompts.csv",
)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:pipeline:Successfully called : ollama:llama3.2 | Intention : test.
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
INFO:pipeline:Successfully called : anthropic:claude-3-7-sonnet-20250219 | Intention : judge.
INFO:pipeline:Inaccurate
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
INFO:pipeline:Successfully called : anthropic:claude-3-5-sonnet-20240620 | Intention : judge.
INFO:pipeline:Accurate


In [7]:
test,judge = claude_sonnet_3_7
print(test[0]['test model response'])

Based on the context document, your risk factors for dementia include:

1. High blood pressure: Having consistent high blood pressure in mid-life (ages 45 to 65) increases your risk of developing dementia.
2. Diabetes: Having type 2 diabetes in mid-life (ages 45 to 65) increases your risk of developing dementia.
3. Obesity: Being obese in mid-life (ages 45 to 65) increases your risk of developing dementia.
4. Lack of physical activity: Physical inactivity in later life (ages 65 and up) increases your risk of developing dementia.
5. Poor diet: Eating an unhealthy diet, high in saturated fat, sugar, and salt, can increase your risk of developing dementia.

Regarding cognitive engagement, it is thought to support the development of a "cognitive reserve". This means that actively using your brain throughout your life may help protect against brain cell damage caused by dementia.


In [8]:
print(judge[0][0]['judge model response'])

Sentence 1: Based on the context document, your risk factors for dementia include:
Sentence 1 label: Accurate

Sentence 2: 1. High blood pressure: Having consistent high blood pressure in mid-life (ages 45 to 65) increases your risk of developing dementia.
Sentence 2 label: Accurate

Sentence 3: 2. Diabetes: Having type 2 diabetes in mid-life (ages 45 to 65) increases your risk of developing dementia.
Sentence 3 label: Inaccurate

Sentence 4: 3. Obesity: Being obese in mid-life (ages 45 to 65) increases your risk of developing dementia.
Sentence 4 label: Inaccurate

Sentence 5: 4. Lack of physical activity: Physical inactivity in later life (ages 65 and up) increases your risk of developing dementia.
Sentence 5 label: Inaccurate

Sentence 6: 5. Poor diet: Eating an unhealthy diet, high in saturated fat, sugar, and salt, can increase your risk of developing dementia.
Sentence 6 label: Accurate

Sentence 7: Regarding cognitive engagement, it is thought to support the development of a "co

# Single sample through pipeline (Google)

In [3]:
google_gemini = process_sample(
    sample_id = 0,
    record = records[0],
    test_models = ["ollama:llama3.2"],
    judge_models = [
        "google_vertexai:gemini-1.5-pro",
        "google_vertexai:gemini-2.5-pro-exp-03-25",
        ],
    evaluation_prompts_file_path = "../data/evaluation_prompts.csv",
)



INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:pipeline:Successfully called : ollama:llama3.2 | Intention : test.
INFO:pipeline:Successfully called : google_vertexai:gemini-1.5-pro | Intention : judge.
INFO:pipeline:Accurate
INFO:pipeline:Successfully called : google_vertexai:gemini-2.5-pro-exp-03-25 | Intention : judge.
INFO:pipeline:Accurate


In [9]:
test,judge = google_gemini

# gemini-1.5-pro output
print(judge[0][0]['judge model response'])

```json
{"sentence": "Based on the context document, your risk factors for dementia include:", "label": "no_rad", "rationale": "This sentence is introductory and does not make a factual claim.", "excerpt": null}
{"sentence": "High blood pressure: Having consistent high blood pressure in mid-life (ages 45 to 65) increases your risk of developing dementia.", "label": "supported", "rationale": "The context states that high blood pressure in mid-life increases the risk of dementia.", "excerpt": "People who have consistent high blood pressure (hypertension) in mid-life (ages 45 to 65) are more likely to develop dementia compared to those with normal blood pressure."}
{"sentence": "Diabetes: Having type 2 diabetes in mid-life (ages 45 to 65) increases your risk of developing dementia.", "label": "supported", "rationale": "The context states that type 2 diabetes in mid-life increases the risk of dementia.", "excerpt": "People with type 2 diabetes in mid-life (ages 45 to 65) are at an increase

In [10]:
# gemini-2.5-pro-exp-03-25 output
print(judge[0][1]['judge model response'])

{"sentence": "Based on the context document, your risk factors for dementia include:", "label": "no_rad", "rationale": "This sentence is an introductory phrase setting up the list that follows and does not require factual attribution from the context.", "excerpt": null}
{"sentence": "High blood pressure: Having consistent high blood pressure in mid-life (ages 45 to 65) increases your risk of developing dementia.", "label": "supported", "rationale": "The context explicitly states that high blood pressure in mid-life (45-65) increases the likelihood of developing dementia.", "excerpt": "People who have consistent high blood pressure (hypertension) in mid-life (ages 45 to 65) are more likely to develop dementia compared to those with normal blood pressure."}
{"sentence": "Diabetes: Having type 2 diabetes in mid-life (ages 45 to 65) increases your risk of developing dementia.", "label": "supported", "rationale": "The context explicitly states that type 2 diabetes in mid-life (45-65) is lin