In [42]:
import dspy
import asyncio
import re
import pandas as pd

DSPy Backbone - signature and module , 
dspy.Signature:define the input/output contract of DSPy. ,
dspy.Module: define the logic of interacting with LLM . Takes user input and signature

In [43]:
import mcp
from mcp import ClientSession , StdioServerParameters
from mcp.client.stdio import stdio_client

In [44]:
from helper import openai_api_key
import os
os.environ["OPENAI_API_KEY"] = openai_api_key

In [45]:
import dspy
dspy.settings.configure(lm = dspy.LM("openai/gpt-4o-mini"))


In [46]:
df = pd.read_csv("/Users/priyanka./Documents/ai-agents/DSPy/healthcare_data.csv")
df = df.drop(['Billing Amount' ,'Room Number' ,'Insurance Provider' ,'Doctor' ,'Hospital'],axis = 1)

In [47]:
df.head()

Unnamed: 0,Name,Age,Gender,Blood Type,Medical Condition,Date of Admission,Admission Type,Discharge Date,Medication,Test Results
0,Bobby JacksOn,30,Male,B-,Cancer,2024-01-31,Urgent,2024-02-02,Paracetamol,Normal
1,LesLie TErRy,62,Male,A+,Obesity,2019-08-20,Emergency,2019-08-26,Ibuprofen,Inconclusive
2,DaNnY sMitH,76,Female,A-,Obesity,2022-09-22,Emergency,2022-10-07,Aspirin,Normal
3,andrEw waTtS,28,Female,O+,Diabetes,2020-11-18,Elective,2020-12-18,Ibuprofen,Abnormal
4,adrIENNE bEll,43,Female,AB+,Cancer,2022-09-19,Urgent,2022-10-09,Penicillin,Abnormal


In [48]:
user_queries = [
    "Find patients with abnormal results in 2024",
    "List all cancer patients above age 40",
    "Who was admitted for diabetes in 2022?",
    "List female patients whose test results are abnormal",
]

In [49]:
import dspy

def search_dataset(query: str, k: int = 3) -> list[str]:
    """
    This function performs a simple keyword search over selected columns in the dataset.
    Returns the top k rows (as formatted strings) matching the query.
    """
    # You may choose columns that make sense for your queries
    cols_to_search = ["Name", "Age", "Gender", "Blood Type", "Medication", "Test Results" ,"Medical Condition" ,"Date of Admission","Admission Type","Discharge Date"]
    # Combine the columns to a single string for searching
    df["search_blob"] = df[cols_to_search].astype(str).agg(' '.join, axis=1)
    # Case-insensitive contains
    matches = df[df["search_blob"].str.contains(query, case=False, na=False)]
    # Get top-k matches as string summaries
    results = []
    for idx, row in matches.head(k).iterrows():
        summary = f'Name: {row["Name"]}, Condition: {row["Medical Condition"]}, Medication: {row["Medication"]}, Test Result: {row["Test Results"]}'
        results.append(summary)
    return results

In [50]:
search_tool = dspy.Tool(
    search_dataset, 
    name="search_dataset"
)

react = dspy.ReAct(
    "question -> answer", 
    tools=[search_tool]
)


In [51]:
for query in user_queries:
    response = react(question=query)
    print(f"Query: {query}")
    print(f"Agent Response: {response}\n")

Query: Find patients with abnormal results in 2024
Agent Response: Prediction(
    trajectory={'thought_0': 'I need to search for patients who have abnormal results specifically in the year 2024. This will likely involve querying the dataset for any records that indicate abnormal results within that timeframe.', 'tool_name_0': 'search_dataset', 'tool_args_0': {'query': 'abnormal results 2024', 'k': 3}, 'observation_0': [], 'thought_1': 'I did not receive any results from the initial search. I should refine my query to ensure I capture any relevant records. I will search for patients with "abnormal" results in the year 2024 more explicitly.', 'tool_name_1': 'search_dataset', 'tool_args_1': {'query': 'abnormal results', 'k': 5}, 'observation_1': [], 'thought_2': 'I still did not receive any results from the previous searches. It may be beneficial to broaden my search parameters further or check for any specific conditions that define "abnormal results." I will search for any records from