## Cell 1: Install and Import Dependencies

In [2]:
import os
import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI
from langchain.tools import tool
from langchain_community.utilities import SQLDatabase

# Load environment variables
load_dotenv()

print("All dependencies imported successfully!")
print("Environment variables loaded!")

All dependencies imported successfully!
Environment variables loaded!


## Cell 2: Setup LLM with GitHub Models

In [3]:
# GitHub Models Configuration
token = os.getenv("GITHUB_TOKEN")
endpoint = "https://models.github.ai/inference"
model_name = "openai/gpt-4.1-mini"

if not token:
    raise ValueError("GITHUB_TOKEN not found in .env file!")

llm = ChatOpenAI(
    model_name=model_name,
    openai_api_key=token,
    openai_api_base=endpoint,
    temperature=0.5,
)

print("LLM initialized successfully!")
print(f"Model: {model_name}")
print(f"Endpoint: {endpoint}")

LLM initialized successfully!
Model: openai/gpt-4.1-mini
Endpoint: https://models.github.ai/inference


## Cell 3: Convert CSVs to SQLite Databases

In [4]:
# Create data directory
data_dir = Path("../data")
data_dir.mkdir(exist_ok=True)

def csv_to_sqlite(csv_name, db_name, table_name):
    """Convert CSV to SQLite database"""
    try:
        csv_path = data_dir / csv_name
        df = pd.read_csv(csv_path)
        print(f"{csv_name}: {len(df)} rows")
        
        db_path = data_dir / db_name
        engine = create_engine(f"sqlite:///{db_path}")
        df.to_sql(table_name, engine, if_exists='replace', index=False)
        
        return str(db_path)
    except Exception as e:
        print(f"Error with {csv_name}: {e}")
        return None

# Convert datasets
print("Converting CSVs to SQLite...\n")

heart_db = csv_to_sqlite("heart.csv", "heart_disease.db", "heart_disease")
cancer_db = csv_to_sqlite("cancer.csv", "cancer.db", "cancer")
diabetes_db = csv_to_sqlite("diabetes.csv", "diabetes.db", "diabetes")

print("\nDatabase conversion completed!")

Converting CSVs to SQLite...

heart.csv: 1025 rows
cancer.csv: 1500 rows
diabetes.csv: 768 rows

Database conversion completed!


## Cell 4: Create Database Query Tools

In [5]:
# Connect to databases
heart_db_conn = SQLDatabase.from_uri(f"sqlite:///{heart_db}")
cancer_db_conn = SQLDatabase.from_uri(f"sqlite:///{cancer_db}")
diabetes_db_conn = SQLDatabase.from_uri(f"sqlite:///{diabetes_db}")


@tool
def query_heart_disease_db(question: str) -> str:
    """
    Query the Heart Disease database for statistics, counts, averages, or any data-related questions.
    Use this for questions about heart disease patient data, age, cholesterol, blood pressure, etc.
    """
    try:
        # Get table info
        table_info = heart_db_conn.get_table_info()
        
        # Create a prompt for the LLM to generate SQL
        prompt = f"""Given the following database schema for heart disease data:
        {table_info}
        Generate a SQL query to answer this question: {question}
        Return ONLY the SQL query, nothing else."""

        # Generate SQL query using LLM
        sql_query = llm.invoke(prompt).content.strip()
        
        # Clean the SQL query (remove markdown formatting if present)
        sql_query = sql_query.replace("```sql", "").replace("```", "").strip()
        
        # Execute query
        result = heart_db_conn.run(sql_query)
        
        return f"Query Result: {result}\n\nSQL Used: {sql_query}"
    
    except Exception as e:
        return f"Error querying heart disease database: {str(e)}"

@tool
def query_cancer_db(question: str) -> str:
    """
    Query the Cancer database for statistics, counts, averages, or any data-related questions.
    Use this for questions about cancer patient data, diagnosis, treatment outcomes, etc.
    """
    try:
        table_info = cancer_db_conn.get_table_info()
        
        prompt = f"""Given the following database schema for cancer data:
        {table_info}
        Generate a SQL query to answer this question: {question}
        Return ONLY the SQL query, nothing else."""

        sql_query = llm.invoke(prompt).content.strip()
        sql_query = sql_query.replace("```sql", "").replace("```", "").strip()
        
        result = cancer_db_conn.run(sql_query)
        
        return f"Query Result: {result}\n\nSQL Used: {sql_query}"
    
    except Exception as e:
        return f"Error querying cancer database: {str(e)}"

@tool
def query_diabetes_db(question: str) -> str:
    """
    Query the Diabetes database for statistics, counts, averages, or any data-related questions.
    Use this for questions about diabetes patient data, glucose levels, insulin, BMI, etc.
    """
    try:
        table_info = diabetes_db_conn.get_table_info()
        
        prompt = f"""Given the following database schema for diabetes data:
        {table_info}
        Generate a SQL query to answer this question: {question}
        Return ONLY the SQL query, nothing else."""

        sql_query = llm.invoke(prompt).content.strip()
        sql_query = sql_query.replace("```sql", "").replace("```", "").strip()
        
        result = diabetes_db_conn.run(sql_query)
        
        return f"Query Result: {result}\n\nSQL Used: {sql_query}"
    
    except Exception as e:
        return f"Error querying diabetes database: {str(e)}"

print("Database query tools created!")

Database query tools created!


## Cell 5: Create Web Search Tool

In [6]:
from tavily import TavilyClient

# Set Tavily API key
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")

@tool
def medical_web_search(query: str) -> str:
    """
    Search the web for general medical knowledge like definitions, symptoms, treatments, and cures.
    Use this for questions about what something is, symptoms of diseases, treatment options, etc.
    """
    try:
        tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
        result = tavily_client.search(
            query=f"medical information: {query}",
            search_depth="advanced",
            max_results=3
        )
        
        # Format results
        answer = "Web Search Result:\n\n"
        for item in result.get('results', []):
            answer += f"â€¢ {item.get('content', '')}\n"
            answer += f"  Source: {item.get('url', '')}\n\n"
        
        return answer
    
    except Exception as e:
        return f"Error performing web search: {str(e)}"

print("Tavily web search tool created!")

Tavily web search tool created!


## Cell 6: Create Main Multi-Tool Agent

In [7]:
# Create the main medical AI agent with all tools
agent_source = None
try:
    from langgraph.prebuilt import create_react_agent
    agent_source = "langgraph.prebuilt"
except ImportError:
    from langchain.agents import create_react_agent
    agent_source = "langchain.agents"

print(f"Created React Agent from: {agent_source}")

# Create medical AI agent
medical_agent = create_react_agent(
    llm,
    [
        query_heart_disease_db,
        query_cancer_db,
        query_diabetes_db,
        medical_web_search
    ]
)

print("Medical AI Agent is ready!")
print("\nTools:")
print("query_heart_disease_db_tool")
print("query_cancer_db_tool")
print("query_diabetes_db_tool")
print("medical_web_search_tool")

Created React Agent from: langgraph.prebuilt
Medical AI Agent is ready!

Tools:
query_heart_disease_db_tool
query_cancer_db_tool
query_diabetes_db_tool
medical_web_search_tool


## Cell 7: Helper Function to Query Agent

In [8]:
def ask_medical_agent(question: str):
    """Helper function to query the medical agent and display results"""
    print(f"Question: {question}\n")
    
    try:
        response = medical_agent.invoke({"messages": [("user", question)]})
        final_message = response['messages'][-1]
        
        print("Answer:", final_message.content, "\n")
        
        # return final_message.content
    
    except Exception as e:
        print(f"Error: {str(e)}\n")
        return None

print("Helper function is ready!")

Helper function is ready!


## Cell 8: Test with Data Queries

In [9]:
# Test with dataset-specific queries
print("Testing Data Queries:\n")

# Heart Disease Query
ask_medical_agent("How many patients have heart disease (target=1) in the dataset?")

# Cancer Query
ask_medical_agent("What is the average age of patients in the cancer dataset?")

# Diabetes Query
ask_medical_agent("How many patients have diabetes outcome of 1 in the dataset?")

# Complex query
ask_medical_agent("What is the average BMI of diabetic patients versus non-diabetic patients?")


# Answer Verification using Pandas
print("Answer verification using Pandas:\n")
df_heart = pd.read_csv(data_dir / 'heart.csv')
df_cancer = pd.read_csv(data_dir / 'cancer.csv')
df_diabetes = pd.read_csv(data_dir / 'diabetes.csv')
print(f"Heart disease count: {len(df_heart[df_heart['target'] == 1])}")
print(f"Cancer avg age: {df_cancer['Age'].mean():.2f}")
print(f"Diabetes count: {len(df_diabetes[df_diabetes['Outcome'] == 1])}")
print(f"Diabetic BMI: {df_diabetes[df_diabetes['Outcome']==1]['BMI'].mean():.2f} | Non-diabetic BMI: {df_diabetes[df_diabetes['Outcome']==0]['BMI'].mean():.2f}")

Testing Data Queries:

Question: How many patients have heart disease (target=1) in the dataset?

Answer: There are 526 patients in the dataset who have heart disease (target=1). 

Question: What is the average age of patients in the cancer dataset?

Answer: The average age of patients in the cancer dataset is approximately 50.32 years. 

Question: How many patients have diabetes outcome of 1 in the dataset?

Answer: There are 268 patients in the dataset who have a diabetes outcome of 1. 

Question: What is the average BMI of diabetic patients versus non-diabetic patients?

Answer: The average BMI of diabetic patients is approximately 35.14, while the average BMI of non-diabetic patients is about 30.30. 

Answer verification using Pandas:

Heart disease count: 526
Cancer avg age: 50.32
Diabetes count: 268
Diabetic BMI: 35.14 | Non-diabetic BMI: 30.30


## Cell 9: Test with Web Search Queries

In [10]:
# Test with general medical knowledge queries
print("Testing the Web Search queries:\n")

# Symptom query
ask_medical_agent("What are the main symptoms of diabetes? Tell me briefly.")

# Definition query
ask_medical_agent("What is coronary heart disease? Tell me briefly.")

Testing the Web Search queries:

Question: What are the main symptoms of diabetes? Tell me in very short.

Answer: The main symptoms of diabetes in short are: excessive thirst, frequent urination (often at night), increased appetite, unexplained weight loss, blurry vision, tiredness, numbness or tingling in hands or feet, slow wound healing, frequent infections, and dry skin. 

Question: What is coronary heart disease? Tell me in very short.

Answer: Coronary heart disease is a condition where the arteries supplying blood to the heart muscle become narrowed or blocked due to the buildup of fatty plaques, reducing blood flow and oxygen to the heart. This can cause chest pain, heart attacks, or other heart problems. 

