In [19]:
import sys

sys.path.append("..")

from methods.simple_rag import SimpleRAG
from dataloader.load_datasets import *
from helpers import llm
from helpers import pubmed
from tqdm import tqdm
from time import time

Load claims

In [5]:
df = pd.read_csv("../dataloader/scifact_medical_causal_claims.csv", index_col=0)
df.head()

Unnamed: 0_level_0,id,claim,evidence_doc_id,evidence_label,evidence_sentences,cited_doc_ids,causal_result_raw,is_medical_causal
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7,12,40mg/day dosage of folic acid and 2mg/day dosa...,33409100,SUPPORT,[8],[33409100],Yes,True
24,30,A breast cancer patient's capacity to metaboli...,24341590,SUPPORT,[10],[24341590],Yes,True
29,34,A deficiency of folate increases blood levels ...,11705328,SUPPORT,[4],[11705328],Yes,True
32,39,A diminished ovarian reserve does not solely i...,13497630,SUPPORT,[7],[13497630],Yes,True
42,41,A high microerythrocyte count protects against...,18174210,SUPPORT,[1 9],[18174210],Yes,True


Load models and setup APIs

In [7]:
model = "deepseek-r1:32b"
host = "localhost"
port = 11434

client = llm.setup_ollama_client(host=host, port=port)
pubmed.set_api_key()

In [12]:
def get_keywords(claim, n_keywords=5, model="deepseek-r1:32b", client=client):
    response = llm.call_ollama(
        model=model,
        prompt=f"Suggest me a set of keywords to search for finding scientific articles about the following claim: {claim}. Give just a simple list of {n_keywords} keywords, separated by commas.",
        client=client,
    )

    output = response.get("response", "Nothing!!")
    return [kw.strip() for kw in output.split(",")][:n_keywords]

Prompts:

In [18]:
cot_prompt = """
You are a biomedical expert specializing in causal inference. Classify the following medical causal claim as either SUPPORTED or CONTRADICT, based on well-established scientific and clinical knowledge.

Before providing your final answer, think through the following steps:
1. What is the proposed causal relationship in this claim?
2. What does established scientific literature say about this relationship?
3. Are there well-documented mechanisms that support or refute this claim?

After your analysis, provide your final classification.

Claim: "{claim}"

Reasoning: 
[Provide your step-by-step reasoning here]

Final Answer: [SUPPORTED or CONTRADICT]
"""

zero_shot_prompt = """
You are a biomedical expert specializing in causal inference and evidence-based reasoning. 
You task is to assess whether the following medical causal claim is SUPPORTED or CONTRADICT based on general scientific and clinical knowledge.
Respond with only one word: SUPPORTED or CONTRADICT.

Claim: "{claim}"
Answer:
"""

Experiments - Zero Shot

In [None]:
claims = df["claim"].tolist()

result = {
    "model": [],
    "method": [],
    "claim": [],
    "documents": [],
    "answer": [],
}

models = ["deepseek-r1:32b", "mistral:7b", "llama3.1:8b", "llama3.1:70b", "qwen3:30b"]

for model in models:
    for claim in tqdm(claims, desc=f"Zero Shot - {model}"):
        response = llm.call_ollama(
            model=model,
            prompt=zero_shot_prompt.format(claim=claim),
            client=client,
        )
        output = response.get("response", "NAN")

        result["model"].append(model)
        result["method"].append("zero_shot")
        result["claim"].append(claim)
        result["documents"].append("NAN")
        result["answer"].append(output)
    
    time.sleep(2)

result = pd.DataFrame(result)

In [None]:
result.to_csv(f"zero_shot_results.csv", index=False)

Experiment - CoT

Experiment - Simple RAG

In [None]:
# Config
num_keywords = 5
full_retrieve = True # retrieve the full papers
top_k = 3 # number of papers to retrieve for each keyword