In [1]:
import sys

sys.path.append("..")

from methods.simple_rag import SimpleRAG
from dataloader.load_datasets import *
from helpers import llm
from helpers import pubmed
from tqdm import tqdm
import time
import os

Load claims

In [2]:
df = pd.read_csv("../dataloader/scifact_medical_causal_claims.csv", index_col=0)
df.head()

Unnamed: 0_level_0,id,claim,evidence_doc_id,evidence_label,evidence_sentences,cited_doc_ids,causal_result_raw,is_medical_causal
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7,12,40mg/day dosage of folic acid and 2mg/day dosa...,33409100,SUPPORT,[8],[33409100],Yes,True
24,30,A breast cancer patient's capacity to metaboli...,24341590,SUPPORT,[10],[24341590],Yes,True
29,34,A deficiency of folate increases blood levels ...,11705328,SUPPORT,[4],[11705328],Yes,True
32,39,A diminished ovarian reserve does not solely i...,13497630,SUPPORT,[7],[13497630],Yes,True
42,41,A high microerythrocyte count protects against...,18174210,SUPPORT,[1 9],[18174210],Yes,True


Load models and setup APIs

In [3]:
default_model = "deepseek-r1:32b"  # used for keywords
host = "localhost"
port = 11434

client = llm.setup_ollama_client(host=host, port=port)
pubmed.set_api_key()

Prompts:

In [None]:
cot_prompt = """
You are a biomedical expert specializing in causal inference and evidence-based reasoning.
Carefully analyze the following medical causal claim before making your decision.
Think step-by-step through the scientific and clinical mechanisms, known studies, and plausible causal pathways.
After reasoning through the evidence, decide whether the claim is SUPPORTED or CONTRADICT based on established knowledge.
Respond only after completing your reasoning.

Claim: "{claim}"

Reasoning: [Your step-by-step analysis here]
Final Answer: [SUPPORTED or CONTRADICT]
"""

zero_shot_prompt = """
You are a biomedical expert specializing in causal inference and evidence-based reasoning. 
You task is to assess whether the following medical causal claim is SUPPORTED or CONTRADICT based on general scientific and clinical knowledge.
Respond with only one word: SUPPORTED or CONTRADICT.

Claim: "{claim}"
Answer:
"""

rag_prompt = """
You are a biomedical expert specializing in causal inference. Classify the following medical causal claim as either SUPPORTED or CONTRADICT, based on the given well-established scientific and clinical knowledge.

Evaluate the medical causal claim using the provided CONTEXT documents:

CONTEXT
{documents}

Before providing your final answer, think through the following steps:
1. What is the proposed causal relationship in this claim?
2. What does established scientific literature say about this relationship?
3. Are there well-documented mechanisms that support or refute this claim?

After your analysis, provide your final classification.

Claim: "{claim}"

Reasoning: 
[Provide your step-by-step reasoning here]

Final Answer: [SUPPORTED or CONTRADICT]
"""

Experiments - Zero Shot

In [None]:
claims = df["claim"].tolist()
output_file = "zero_shot_results.csv"

# Check if output file exists to determine if we need to write headers
file_exists = os.path.isfile(output_file)

models = [
    "deepseek-r1:32b",
    "mistral:7b",
    "llama3.1:8b",
    "llama3.1:70b",
    "qwen3:30b",
]

for model in models:
    result = {
        "model": [],
        "method": [],
        "claim": [],
        "documents": [],
        "answer": [],
    }

    for claim in tqdm(claims, desc=f"Zero Shot - {model}"):
        response = llm.call_ollama(
            model=model,
            prompt=zero_shot_prompt.format(claim=claim),
            client=client,
        )
        output = response.get("response", "NAN")

        result["model"].append(model)
        result["method"].append("zero_shot")
        result["claim"].append(claim)
        result["documents"].append("NAN")
        result["answer"].append(output)

    # Convert to DataFrame and append to CSV
    result_df = pd.DataFrame(result)
    result_df.to_csv(
        output_file,
        mode="a" if file_exists else "w",
        header=not file_exists,
        index=False,
    )
    file_exists = True  # After first write, file exists

    time.sleep(2)

In [None]:
# Load and display final results
final_results = pd.read_csv(output_file)
print(f"Total results saved: {len(final_results)}")
final_results.tail()

Experiment - CoT

In [None]:
claims = df["claim"].tolist()
output_file = "cot_results.csv"

# Check if output file exists to determine if we need to write headers
file_exists = os.path.isfile(output_file)

models = [
    "deepseek-r1:32b",
    "mistral:7b",
    "llama3.1:8b",
    "llama3.1:70b",
    "qwen3:30b",
]

for model in models:
    result = {
        "model": [],
        "method": [],
        "claim": [],
        "documents": [],
        "answer": [],
    }

    for claim in tqdm(claims, desc=f"CoT - {model}"):
        response = llm.call_ollama(
            model=model,
            prompt=cot_prompt.format(claim=claim),
            client=client,
        )
        output = response.get("response", "NAN")

        result["model"].append(model)
        result["method"].append("cot")
        result["claim"].append(claim)
        result["documents"].append("NAN")
        result["answer"].append(output)

    # Convert to DataFrame and append to CSV
    result_df = pd.DataFrame(result)
    result_df.to_csv(
        output_file,
        mode="a" if file_exists else "w",
        header=not file_exists,
        index=False,
    )
    file_exists = True  # After first write, file exists

    time.sleep(2)

In [None]:
# Load and display final results
final_results = pd.read_csv(output_file)
print(f"Total results saved: {len(final_results)}")
final_results.tail()