In [1]:
import sys

sys.path.append("..")

from methods.simple_rag import SimpleRAG
from dataloader.load_datasets import *
from helpers import llm
from helpers import pubmed
from tqdm import tqdm
import time
import os

Load claims

In [2]:
df = pd.read_csv("../dataloader/scifact_medical_causal_claims.csv", index_col=0)
df.head()

Unnamed: 0_level_0,id,claim,evidence_doc_id,evidence_label,evidence_sentences,cited_doc_ids,causal_result_raw,is_medical_causal
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7,12,40mg/day dosage of folic acid and 2mg/day dosa...,33409100,SUPPORT,[8],[33409100],Yes,True
24,30,A breast cancer patient's capacity to metaboli...,24341590,SUPPORT,[10],[24341590],Yes,True
29,34,A deficiency of folate increases blood levels ...,11705328,SUPPORT,[4],[11705328],Yes,True
32,39,A diminished ovarian reserve does not solely i...,13497630,SUPPORT,[7],[13497630],Yes,True
42,41,A high microerythrocyte count protects against...,18174210,SUPPORT,[1 9],[18174210],Yes,True


Load models and setup APIs

In [3]:
default_model = "deepseek-r1:32b"  # used for keywords
host = "localhost"
port = 11434

client = llm.setup_ollama_client(host=host, port=port)
pubmed.set_api_key()

Prompts:

In [4]:
cot_prompt = """
You are a biomedical expert specializing in causal inference. Classify the following medical causal claim as either SUPPORTED or CONTRADICT, based on well-established scientific and clinical knowledge.

Before providing your final answer, think through the following steps:
1. What is the proposed causal relationship in this claim?
2. What does established scientific literature say about this relationship?
3. Are there well-documented mechanisms that support or refute this claim?

After your analysis, provide your final classification.

Claim: "{claim}"

Reasoning: 
[Provide your step-by-step reasoning here]

Final Answer: [SUPPORTED or CONTRADICT]
"""

zero_shot_prompt = """
You are a biomedical expert specializing in causal inference and evidence-based reasoning. 
You task is to assess whether the following medical causal claim is SUPPORTED or CONTRADICT based on general scientific and clinical knowledge.
Respond with only one word: SUPPORTED or CONTRADICT.

Claim: "{claim}"
Answer:
"""

rag_prompt = """
You are a biomedical expert specializing in causal inference. Classify the following medical causal claim as either SUPPORTED or CONTRADICT, based on the given well-established scientific and clinical knowledge.

Evaluate the medical causal claim using the provided CONTEXT documents:

CONTEXT
{documents}

Before providing your final answer, think through the following steps:
1. What is the proposed causal relationship in this claim?
2. What does established scientific literature say about this relationship?
3. Are there well-documented mechanisms that support or refute this claim?

After your analysis, provide your final classification.

Claim: "{claim}"

Reasoning: 
[Provide your step-by-step reasoning here]

Final Answer: [SUPPORTED or CONTRADICT]
"""

Experiments - Zero Shot

In [None]:
claims = df["claim"].tolist()
output_file = "zero_shot_results.csv"

# Check if output file exists to determine if we need to write headers
file_exists = os.path.isfile(output_file)

models = [
    "deepseek-r1:32b",
    "mistral:7b",
    "llama3.1:8b",
    "llama3.1:70b",
    "qwen3:30b",
]

for model in models:
    result = {
        "model": [],
        "method": [],
        "claim": [],
        "documents": [],
        "answer": [],
    }

    for claim in tqdm(claims, desc=f"Zero Shot - {model}"):
        response = llm.call_ollama(
            model=model,
            prompt=zero_shot_prompt.format(claim=claim),
            client=client,
        )
        output = response.get("response", "NAN")

        result["model"].append(model)
        result["method"].append("zero_shot")
        result["claim"].append(claim)
        result["documents"].append("NAN")
        result["answer"].append(output)

    # Convert to DataFrame and append to CSV
    result_df = pd.DataFrame(result)
    result_df.to_csv(
        output_file,
        mode="a" if file_exists else "w",
        header=not file_exists,
        index=False,
    )
    file_exists = True  # After first write, file exists

    time.sleep(2)

In [None]:
# Load and display final results
final_results = pd.read_csv(output_file)
print(f"Total results saved: {len(final_results)}")
final_results.tail()

Experiment - CoT

In [None]:
claims = df["claim"].tolist()
output_file = "cot_results.csv"

# Check if output file exists to determine if we need to write headers
file_exists = os.path.isfile(output_file)

models = [
    "deepseek-r1:32b",
    "mistral:7b",
    "llama3.1:8b",
    "llama3.1:70b",
    "qwen3:30b",
]

for model in models:
    result = {
        "model": [],
        "method": [],
        "claim": [],
        "documents": [],
        "answer": [],
    }

    for claim in tqdm(claims, desc=f"CoT - {model}"):
        response = llm.call_ollama(
            model=model,
            prompt=cot_prompt.format(claim=claim),
            client=client,
        )
        output = response.get("response", "NAN")

        result["model"].append(model)
        result["method"].append("cot")
        result["claim"].append(claim)
        result["documents"].append("NAN")
        result["answer"].append(output)

    # Convert to DataFrame and append to CSV
    result_df = pd.DataFrame(result)
    result_df.to_csv(
        output_file,
        mode="a" if file_exists else "w",
        header=not file_exists,
        index=False,
    )
    file_exists = True  # After first write, file exists

    time.sleep(2)

In [None]:
# Load and display final results
final_results = pd.read_csv(output_file)
print(f"Total results saved: {len(final_results)}")
final_results.tail()

### RAG

Store RAG results

In [5]:
def get_keywords(claim, n_keywords=5, model="deepseek-r1:32b", client=default_model):
    response = llm.call_ollama(
        model=model,
        prompt=f"Suggest me a set of keywords to search for finding scientific articles about the following claim: {claim}. Give just a simple list of {n_keywords} keywords, separated by commas with no further explanation.",
        client=client,
    )

    output = response.get("response", "Nothing!!")

    # Extract keywords after </think> tag if present
    if "</think>" in output:
        output = output.split("</think>")[-1].strip()

    # Split by comma and clean up
    keywords = [kw.strip() for kw in output.split(",") if kw.strip()][:n_keywords]

    return " + ".join(keywords)


def retrieve(keywords, full=False, top_k=5):
    papers = pubmed.get_papers(keywords, top_k=top_k)
    content = ""
    for i, paper in enumerate(papers):
        if not full:
            content += f"\nAbstract Paper {i + 1}: " + paper.abstract
        else:
            if paper.pmc_id is not None:
                text = pubmed.fetch_pmc_fulltext(paper.pmc_id)
                if text is not None:
                    content += f"\nFull Paper {i + 1}:" + text
                else:
                    content += f"\nAbstract Paper {i + 1}:" + paper.abstract
            else:
                content += f"\nAbstract Paper {i + 1}:" + paper.abstract
    return content

In [None]:
claims = df["claim"].tolist()

results = {"claim": [], "content_abs": [], "content_full": [], "keywords": []}

for claim in tqdm(claims):
    keywords = get_keywords(claim, n_keywords=4, model=default_model, client=client)

    content_abs = retrieve(keywords, full=False, top_k=10)
    content_full = retrieve(keywords, full=True, top_k=3)

    print(f"content_abs: {content_abs}")
    if len(content_abs) == 0:
        print("No relevant documents found for claim, abstract")
        content_abs = "No relevant documents found."
    if len(content_full) == 0:
        print("No relevant documents found for claim, full text")
        content_full = "No relevant documents found."

    
    results["claim"].append(claim)
    results["content_abs"].append(content_abs)
    results["content_full"].append(content_full)
    results["keywords"].append(keywords)

results = pd.DataFrame(results)
results.to_csv("rag_retrieved_2.csv", index=False)

In [18]:
df = pd.read_csv("rag_retrieved.csv")
df.head()

Unnamed: 0,claim,content_abs,content_full,keywords
0,40mg/day dosage of folic acid and 2mg/day dosa...,\nAbstract Paper 1: In patients with chronic k...,\nAbstract Paper 1:In patients with chronic ki...,folic acid + vitamin B12 + chronic kidney dise...
1,A breast cancer patient's capacity to metaboli...,\nAbstract Paper 1: The development of acquire...,\nFull Paper 1:ABSTRACT:\nObjective The develo...,breast cancer + tamoxifen + metabolism + treat...
2,A deficiency of folate increases blood levels ...,\nAbstract Paper 1: Gestational diabetes melli...,\nFull Paper 1:ABSTRACT:\nGestational diabetes...,folate deficiency + homocysteine + vitamin B9 ...
3,A diminished ovarian reserve does not solely i...,\nAbstract Paper 1: To evaluate the predictive...,\nAbstract Paper 1:To evaluate the predictive ...,diminished ovarian reserve + infertility + ova...
4,A high microerythrocyte count protects against...,No relevant documents found.,No relevant documents found.,Microerythrocyte count + Anemia protection + A...


In [21]:
df.shape

(200, 4)

In [22]:
hard_claims = df[df["content_abs"] == "No relevant documents found."]

In [23]:
hard_claims.head()

Unnamed: 0,claim,content_abs,content_full,keywords
4,A high microerythrocyte count protects against...,No relevant documents found.,No relevant documents found.,Microerythrocyte count + Anemia protection + A...
7,A strong bias in the phage genome locations wh...,No relevant documents found.,No relevant documents found.,CRISPR subtype + Phage genome location + Space...
9,AMP-activated protein kinase (AMPK) activation...,No relevant documents found.,No relevant documents found.,AMP-activated protein kinase (AMPK) + Inflamma...
10,ART substantially reduces infectiveness of HIV...,No relevant documents found.,No relevant documents found.,Here is a list of 4 keywords to search for sci...
13,Activation of the Rac1 homolog CED-10 kills vi...,No relevant documents found.,No relevant documents found.,CED-10 + Rac1 homolog + SRGP-1 mutant + Caenor...


In [27]:
hard_claims["keywords"].to_list()[:4]

['Microerythrocyte count + Anemia protection + Alpha-thalassemia trait + Hematological mechanisms',
 'CRISPR subtype + Phage genome location + Spacer origin bias + CRISPR immunity',
 'AMP-activated protein kinase (AMPK) + Inflammation-related fibrosis + Pulmonary fibrosis + Anti-inflammatory effects',
 'Here is a list of 4 keywords to search for scientific articles about the claim:\n\nAntiretroviral Therapy (ART) + HIV Infectiousness + Treatment as Prevention + Transmission Reduction']

In [None]:
results = {"claim": [], "content_abs": [], "content_full": [], "keywords": []}

for i, row in tqdm(
    hard_claims.iterrows(), total=len(hard_claims), desc="Processing rows"
):
    keywords = row["keywords"]
    claim = row["claim"]

    content_abs = retrieve(keywords, full=False, top_k=10)
    content_full = retrieve(keywords, full=True, top_k=3)

    print(f"content_abs: {content_abs}")
    if len(content_abs) == 0:
        print("No relevant documents found for claim, abstract")
        content_abs = "No relevant documents found."
    if len(content_full) == 0:
        print("No relevant documents found for claim, full text")
        content_full = "No relevant documents found."

    results["claim"].append(claim)
    results["content_abs"].append(content_abs)
    results["content_full"].append(content_full)
    results["keywords"].append(keywords)

results = pd.DataFrame(results)
results.to_csv("rag_retrieved_2.csv", index=False)