# RAG Experiments

This notebook runs experiments using retrieved abstracts from PubMed to answer medical causal claims.

In [1]:
import sys

sys.path.append("..")

from helpers import llm
import pandas as pd
from tqdm import tqdm
import time
import os

## Load RAG Documents

In [2]:
rag_df = pd.read_csv("rag_documents.csv")
print(f"Loaded {len(rag_df)} claims with retrieved documents")
rag_df.head()

Loaded 200 claims with retrieved documents


Unnamed: 0,claim,keywords,paper_ids,concatenated_abstracts,num_papers
0,40mg/day dosage of folic acid and 2mg/day dosa...,folic acid AND vitamin B12 AND chronic kidney ...,"40331388,36839219,36757031,31866227,31629573,3...",[PMID: 40331388] In patients with chronic kidn...,10
1,A breast cancer patient's capacity to metaboli...,breast cancer AND tamoxifen AND metabolism AND...,"41031251,40983817,40967344,40808956,40776023,4...",[PMID: 41031251] <b>Background:</b> Tamoxifen ...,10
2,A deficiency of folate increases blood levels ...,folate deficiency AND homocysteine AND relatio...,"41111878,40960431,40948861,40362647,40354985,4...",[PMID: 41111878] Cerebral venous sinus thrombo...,10
3,A diminished ovarian reserve does not solely i...,diminished ovarian reserve AND infertility AND...,"40801481,40801036,40773998,40725960,40691107,4...",[PMID: 40801481] Diminished ovarian reserve (D...,10
4,A high microerythrocyte count protects against...,Microerythrocyte AND Thalassemia AND Anemia se...,"41187351,41187335,41187318,41187268,41187246,4...",[PMID: 41187351] Epidemiological studies of la...,10


## Setup Ollama Client

In [3]:
host = "localhost"
port = 11434

client = llm.setup_ollama_client(host=host, port=port)

## Define RAG Prompt

In [4]:
rag_prompt = """
You are a biomedical expert specializing in causal inference. 

Evaluate the following medical causal claim based ONLY on the provided scientific abstracts.

ABSTRACTS:
{documents}

CLAIM: "{claim}"

Carefully analyze the evidence in the abstracts. If the abstracts support the claim, respond with SUPPORTED. If they contradict the claim, respond with CONTRADICT.

Provide your reasoning, cite relevant papers by PMID, and then give your final answer.

Final Answer: [SUPPORTED or CONTRADICT]
"""

## Run RAG Experiments

In [None]:
output_file = "rag_results.csv"

# Check if output file exists to determine if we need to write headers
file_exists = os.path.isfile(output_file)

models = [
    "deepseek-r1:32b",
    "mistral:7b",
    "llama3.1:8b",
    "qwen3:30b",
    "qwen3:8b",
    "llama3.1:70b",
]

for model in models:
    result = {
        "model": [],
        "method": [],
        "claim": [],
        "keywords": [],
        "paper_ids": [],
        "num_papers": [],
        "documents": [],
        "answer": [],
    }

    for idx, row in tqdm(rag_df.iterrows(), total=len(rag_df), desc=f"RAG - {model}"):
        claim = row["claim"]
        documents = row["concatenated_abstracts"]
        keywords = row["keywords"]
        paper_ids = row["paper_ids"]
        num_papers = row["num_papers"]

        # Call LLM with RAG prompt
        response = llm.call_ollama(
            model=model,
            prompt=rag_prompt.format(claim=claim, documents=documents),
            client=client,
        )
        output = response.get("response", "NAN")

        result["model"].append(model)
        result["method"].append("rag")
        result["claim"].append(claim)
        result["keywords"].append(keywords)
        result["paper_ids"].append(paper_ids)
        result["num_papers"].append(num_papers)
        result["documents"].append(documents)
        result["answer"].append(output)

    # Convert to DataFrame and append to CSV
    result_df = pd.DataFrame(result)
    result_df.to_csv(
        output_file,
        mode="a" if file_exists else "w",
        header=not file_exists,
        index=False,
    )
    file_exists = True  # After first write, file exists

    print(f"Completed {model}")
    time.sleep(2)

print("\nAll experiments complete!")

## Load and Display Results

In [None]:
# Load and display final results
final_results = pd.read_csv(output_file)
print(f"Total results saved: {len(final_results)}")
final_results.tail()

## Summary Statistics

In [None]:
print("Results by model:")
print(final_results.groupby("model").size())
print("\nAverage number of papers used:")
print(final_results.groupby("model")["num_papers"].mean())

## Example: View One Result

In [None]:
# Display the first result
example_idx = 0
print(f"Model: {final_results.iloc[example_idx]['model']}")
print(f"\nClaim: {final_results.iloc[example_idx]['claim']}")
print(f"\nKeywords: {final_results.iloc[example_idx]['keywords']}")
print(f"\nNumber of papers: {final_results.iloc[example_idx]['num_papers']}")
print(f"\nPaper IDs: {final_results.iloc[example_idx]['paper_ids']}")
print(f"\nAnswer:\n{final_results.iloc[example_idx]['answer']}")