In [6]:
import os
import requests
from typing import TypedDict, Annotated, List, Dict, Any
from langgraph.graph import StateGraph, END
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from typing import Any, List, Dict
from pydantic import BaseModel, Field
import json

# Load environment variables from .env file
load_dotenv()

True

In [14]:
class SearchResponse(BaseModel):
    query: str = Field(..., description="The original search query")
    search_results: Any = Field(..., description="The search results from the SERP API")
    answer: str = Field(..., description="The final answer generated by the AI")
    source_count: int = Field(..., description="The number of sources found")
    all_links: List[Dict[str, str]] = Field(..., description="List of sources in the format 'title': 'URL'")

In [15]:
class AgentState(TypedDict):
    messages: Annotated[List[Any], "The conversation messages"]
    query: Annotated[str, "The user's search query"]
    search_results: Annotated[Dict, "Search results from SERP API"]
    final_answer: Annotated[str, "Final answer to the user"]

class SimpleSearchAgent:
    def __init__(self):
        # Initialize Gemini LLM
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash",
            api_key=os.getenv("GEMINI_API_KEY"),
            max_output_tokens=8100,
        ).with_structured_output(SearchResponse)
        
        # SERP API configuration
        self.serp_api_key = os.getenv("SERP_API_KEY")
        self.serp_base_url = "https://serpapi.com/search"
        
        # Create the graph
        self.graph = self._create_graph()

    def _serp_search(self, query: str) -> Dict:
        """Search using SERP API with optimized parameters"""
        try:
            # Advanced SERP API parameters for accurate results
            params = {
                "engine": "google",
                "q": query,
                "api_key": self.serp_api_key,
                "num": 10,  # Number of results (1-100)
                "start": 0,  # Starting position
                "hl": "en",  # Language
                "gl": "us",  # Country
                "google_domain": "google.com",
                "safe": "active",  # Safe search
                "device": "desktop",
                "no_cache": "false",
                # Advanced features
                "include_html": "true",  # Include HTML snippets
                "output": "json",
                "tbm": "nws" if self._is_news_query(query) else None,  # News search if needed
            }
            
            # Add time-based filtering for recent queries
            if self._is_time_sensitive(query):
                params["tbs"] = "qdr:w"  # Past week (qdr:d for day, qdr:m for month, qdr:y for year)
            
            response = requests.get(self.serp_base_url, params=params)
            response.raise_for_status()
            
            return response.json()
            
        except Exception as e:
            return {
                "error": f"Search error: {str(e)}",
                "organic_results": [],
                "answer_box": {},
                "knowledge_graph": {}
            }

    def _is_news_query(self, query: str) -> bool:
        """Determine if query should use news search"""
        news_keywords = ['news', 'latest', 'recent', 'breaking', 'today', 'current', 'update']
        return any(keyword in query.lower() for keyword in news_keywords)

    def _is_time_sensitive(self, query: str) -> bool:
        """Determine if query needs recent results"""
        time_keywords = ['recent', 'latest', 'current', 'today', 'now', 'new', '2024', '2025']
        return any(keyword in query.lower() for keyword in time_keywords)

    def _search_node(self, state: AgentState) -> AgentState:
        """Execute SERP API search"""
        query = state["query"]
        search_results = self._serp_search(query)
        state["search_results"] = search_results
        return state

    def _answer_node(self, state: AgentState) -> AgentState:
        """Generate final answer using Gemini with search results"""
        query = state["query"]
        search_results = state["search_results"]
        
        # Format search results for the prompt
        results_text = ""
        
        # Check for errors first
        if "error" in search_results:
            results_text = f"Search Error: {search_results['error']}"
        else:
            # Add answer box if available (Google's direct answer)
            if search_results.get("answer_box"):
                answer_box = search_results["answer_box"]
                results_text += f"Google Direct Answer: {answer_box.get('answer', '')}\n"
                if answer_box.get("snippet"):
                    results_text += f"Snippet: {answer_box['snippet']}\n"
            
            # Add knowledge graph if available
            if search_results.get("knowledge_graph"):
                kg = search_results["knowledge_graph"]
                results_text += f"Knowledge Graph: {kg.get('title', '')} - {kg.get('description', '')}\n"
            
            # Add organic search results
            if search_results.get("organic_results"):
                results_text += "Search Results:\n"
                for i, result in enumerate(search_results["organic_results"][:10], 1):
                    title = result.get('title', 'No title')
                    link = result.get('link', 'No URL')
                    snippet = result.get('snippet', 'No content')[:300]
                    
                    results_text += f"{i}. {title}\n"
                    results_text += f"   URL: {link}\n"
                    results_text += f"   Content: {snippet}...\n\n"
            
            # Add related questions if available
            if search_results.get("related_questions"):
                results_text += "Related Questions:\n"
                for q in search_results["related_questions"][:3]:
                    results_text += f"- {q.get('question', '')}\n"

        # Create prompt for answer generation
        answer_prompt = ChatPromptTemplate.from_messages([
            ("system", """You are a helpful AI assistant. Based on the search results provided, give a comprehensive and accurate answer to the user's question.

                    Guidelines:
                    1. Use the search results as your primary source of information
                    2. Be factual and provide specific details from the search results
                    3. If there's a Google Direct Answer, incorporate it but add additional context from other results
                    4. Structure your answer clearly with relevant details
                    5. Mention sources when citing specific information
                    6. If search results are insufficient, clearly state what information is missing
                    7. Provide a well-rounded answer that addresses the user's query comprehensively

                Search Results:
                {search_results}"""),
                            ("human", "Question: {query}")
                        ])

        answer_chain = answer_prompt | self.llm

        try:
            response = answer_chain.invoke({
                "query": query,
                "search_results": results_text
            })

            response_dict = response.model_dump()
            json_response = json.dumps(response_dict, indent=2, ensure_ascii=False)
            state["final_answer"] = json_response
            
            # Add AI response to messages
            state["messages"].append(AIMessage(content=json_response))
            
        except Exception as e:
            error_msg = f"Error generating answer: {str(e)}"
            state["final_answer"] = error_msg
            state["messages"].append(AIMessage(content=error_msg))
            
        return state

    def _create_graph(self) -> StateGraph:
        """Create the simple LangGraph workflow"""
        workflow = StateGraph(AgentState)
        
        # Add nodes
        workflow.add_node("search", self._search_node)
        workflow.add_node("answer", self._answer_node)
        
        # Add edges
        workflow.set_entry_point("search")
        workflow.add_edge("search", "answer")
        workflow.add_edge("answer", END)
        
        return workflow.compile()

    def search(self, query: str) -> Dict[str, Any]:
        """
        Search and get answer
        
        Args:
            query: The search query
            
        Returns:
            Dictionary with search results and answer
        """
        initial_state = {
            "messages": [HumanMessage(content=query)],
            "query": query,
            "search_results": {},
            "final_answer": ""
        }
        
        final_state = self.graph.invoke(initial_state)
        
        search_json = final_state.get("search_results", {})
        organic_results = search_json.get("organic_results", [])
    
        all_links = [
            {"title": result.get("title", ""), "link": result.get("link", "")}
            for result in organic_results
        ]
        
        # return {
        #     "query": query,
        #     "search_results": search_json,
        #     "structured_answer": final_state.get("final_answer", ""),
        #     "serp_answer": final_state.get("search_results", {}).get("results", ""),
        #     "source_count": len(final_state.get("search_results", {}).get("results", [])),
        #     "all_links": all_links
        # }
        
        return {
            "query": query,
            "search_results": search_json,
            "structured_answer": final_state.get("final_answer", ""),
            "serp_answer": search_json.get("answer_box", {}).get("answer", ""),
            "source_count": len(organic_results),
            "all_links": all_links
    }

    def get_raw_search(self, query: str) -> Dict:
        """
        Get raw SERP API search results without LLM processing
        
        Args:
            query: The search query
            
        Returns:
            Raw SERP API response
        """
        return self._serp_search(query)


In [16]:

# Example usage
if __name__ == "__main__":
    # Initialize the agent
    agent = SimpleSearchAgent()
    
    # Example searches
    queries = [
        "I need complete details about the healthcare professional Dr Carlo Mario Lombardi DAY Hospital Cardiologia Brescia",
    ]
    
    for query in queries:
        print(f"\n{'='*60}")
        print(f"Query: {query}")
        print(f"{'='*60}")
        
        result = agent.search(query)
        
        # print(f"SERP Direct Answer: {result['serp_answer'] if result['serp_answer'] else 'No direct answer'}")
        print(f"\n Strucutred Answer: {str(result['structured_answer'])}")
        print(f"Sources found: {result['source_count']}")
        print(f"-" * 60)

        # Example of raw search
        print("\n--- Raw SERP Response Preview ---")
        raw_result = agent.get_raw_search(query)
        if raw_result.get("organic_results"):
            print("All Search Results:")
            for i, result in enumerate(raw_result["organic_results"], 1):  # ←← Loop through ALL results
                print(f"{i}. {result.get('title', 'No title')}")
                print(f"   URL: {result.get('link', 'No URL')}")
                print()  # Empty line for readability
        print("-" * 40)


Query: I need complete details about the healthcare professional Dr Carlo Mario Lombardi DAY Hospital Cardiologia Brescia

 Strucutred Answer: {
  "query": "I need complete details about the healthcare professional Dr Carlo Mario Lombardi DAY Hospital Cardiologia Brescia",
  "search_results": "[{\"title\": \"CARLO MARIO LOMBARDI - Brescia\", \"url\": \"https://www.unibs.it/sites/default/files/ugov_cvfiles/cv/ugov_cvpersona_en_000006009.pdf\", \"content\": \"Teaching activities. • 2018- Professor of Clinical cardiology; Postgraduate school of cardiology. University of Brescia....\"}, {\"title\": \"Carlo LOMBARDI | Associate Professor of Cardiology\", \"url\": \"https://www.researchgate.net/profile/Carlo-Lombardi\", \"content\": \"Carlo LOMBARDI, Associate Professor of Cardiology | Cited by 7861 | of University of Brescia, Brescia (UNIBS) | Read 170 publications | Contact Carlo ......\"}, {\"title\": \"Carlo Mario Lombardi\", \"url\": \"https://loop.frontiersin.org/people/1081360/overvi