In [14]:
import dspy

llama3b = dspy.OllamaLocal(model='phi4:latest', temperature=0.7)
gpt4o = dspy.OllamaLocal(model='deepseek-v2:latest', temperature=0.7)

dspy.configure(lm=llama3b)

In [15]:
import random
from dspy.datasets import DataLoader

kwargs = dict(fields=("claim", "supporting_facts", "hpqa_id", "num_hops"), input_keys=("claim",))
hover = DataLoader().from_huggingface(dataset_name="hover-nlp/hover", split="train", trust_remote_code=True, **kwargs)

hpqa_ids = set()
hover = [
    dspy.Example(claim=x.claim, titles=list(set([y["key"] for y in x.supporting_facts]))).with_inputs("claim")
    for x in hover
    if x["num_hops"] == 3 and x["hpqa_id"] not in hpqa_ids and not hpqa_ids.add(x["hpqa_id"])
]

random.Random(0).shuffle(hover)
trainset, devset, testset = hover[:100], hover[100:200], hover[650:]

In [16]:
example = trainset[0]

print("Claim:", example.claim)
print("Pages that must be retrieved:", example.titles)

Claim: This director is known for his work on Miss Potter. The Academy of Motion Picture Arts and Sciences presents the award in which he was nominated for his work in "Babe".
Pages that must be retrieved: ['Miss Potter', 'Chris Noonan', 'Academy Award for Best Director']


In [17]:
DOCS = {}

def search(query: str, k: int) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=k)
    results = [x['text'] for x in results]

    for result in results:
        title, text = result.split(" | ", 1)
        DOCS[title] = text

    return results

In [18]:
def search_wikipedia(query: str) -> list[str]:
    """Returns top-5 results and then the titles of the top-5 to top-30 results."""

    topK = search(query, 30)
    titles, topK = [f"`{x.split(' | ')[0]}`" for x in topK[5:30]], topK[:5]
    return topK + [f"Other retrieved pages have titles: {', '.join(titles)}."]

def lookup_wikipedia(title: str) -> str:
    """Returns the text of the Wikipedia page, if it exists."""

    if title in DOCS:
        return DOCS[title]

    results = [x for x in search(title, 10) if x.startswith(title + " | ")]
    if not results:
        return f"No Wikipedia page found for title: {title}"
    return results[0]

In [19]:
instructions = "Find all Wikipedia titles relevant to verifying (or refuting) the claim."
signature = dspy.Signature("claim -> titles: list[str]", instructions)
react = dspy.ReAct(signature, tools=[search_wikipedia, lookup_wikipedia], max_iters=20)

In [20]:
react(claim="David Gregory was born in 1625.")

Prediction(
    trajectory={'thought_0': 'Claim: David Gregory was born in 1625.\n\nTrajectory:\n\n1. **Next Thought:** To verify the claim about David Gregory\'s birth year, I need to find reliable sources that mention his birth date or provide a biography. Wikipedia is a good starting point for historical figures like him.\n\n2. **Next Tool Name:** search_wikipedia\n\n3. **Next Tool Args:**\n\n```json\n{"query": "David Gregory"}\n```\n\n---\n\n**Observation:** The top-5 results from the Wikipedia search might include titles such as "David Gregory (mathematician)," along with other related entries. These will help determine if there\'s a specific page dedicated to David Gregory that mentions his birth year.\n\n4. **Next Thought:** I\'ll', 'tool_name_0': '', 'tool_args_0': 'Claim: David Gregory was born in 1625.\n\nTrajectory:\n\n1. **Next Thought:** To verify the claim about David Gregory\'s birth year, I need to find reliable sources that mention his birth date or provide a biography

In [21]:
def top5_recall(example, pred, trace=None):
    gold_titles = example.titles
    recall = sum(x in pred.titles[:5] for x in gold_titles) / len(gold_titles)

    # If we're "bootstrapping" for optimization, return True if and only if the recall is perfect.
    if trace is not None:
        return recall >= 1.0
    
    # If we're just doing inference, just measure the recall.
    return recall

evaluate = dspy.Evaluate(devset=devset, metric=top5_recall, num_threads=16, display_progress=True, display_table=5)

In [24]:
def safe_react(claim: str):
    try:
        return react(claim=claim)
    except Exception as e:
        return dspy.Prediction(titles=[])

evaluate(safe_react)

Average Metric: 0.00 / 100 (0.0%): 100%|██████████| 100/100 [15:21<00:00,  9.22s/it]

2025/01/18 19:11:31 INFO dspy.evaluate.evaluate: Average Metric: 0.0 / 100 (0.0%)





Unnamed: 0,claim,example_titles,pred_titles,top5_recall
0,The Church of England's movement that inspired the Trinity Episcop...,"[Samuel Rickards, Oxford Movement, Trinity Episcopal Church (Hough...",[],
1,"Red, White & Crüe and this athlete both fight. The french fighter ...","[Bobby Stewart, Red, White &amp; Crüe, Mike Tyson]",[],
2,The writer/director/actor from Glen or Glenda and Fernand Rivers s...,"[Ed Wood, Glen or Glenda, Fernand Rivers]",[],
3,The film by Sandi Sissel was released before The End of Suburbia.,"[Chicken Ranch (film), Sandi Sissel, The End of Suburbia]",[],
4,The actor who played captain hook in the live production with Tayl...,"[Peter Pan Live!, Taylor Louderman, Christopher Walken]",[],


0.0

In [None]:
kwargs = dict(teacher_settings=dict(lm=gpt4o), prompt_model=gpt4o, max_errors=999)

tp = dspy.MIPROv2(metric=top5_recall, auto="medium", num_threads=16, **kwargs)
optimized_react = tp.compile(react, trainset=trainset, max_bootstrapped_demos=3, max_labeled_demos=0)

2025/01/18 19:11:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:
num_trials: 25
minibatch: True
num_candidates: 9
valset size: 80

