# Figuring dependencies

## Installing Dependencies

In [139]:
!pip install \
  langchain \
  langchain-core \
  langchain-tavily \
  langchain-community \
  tavily-python \
  langgraph \
  faiss-cpu \
  --quiet

## Import Statements

In [140]:
import os
import json
import time
import requests
from typing import Any, Dict, List, Optional, Mapping, Tuple, Union
from langchain_core.tools import Tool
from langchain.llms.base import BaseLLM
from langchain.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from langchain_core.language_models import LLM
from langchain_core.callbacks import CallbackManagerForLLMRun
from langgraph.graph import StateGraph, END
from typing import TypedDict, Literal, Annotated
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain_community.vectorstores import FAISS
from langchain_core.vectorstores import VectorStore
from langchain_core.embeddings import Embeddings
from langchain_core.documents import Document
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from google.colab import userdata

# Setting up agents

### SETUP

In [141]:
callbacks = [StreamingStdOutCallbackHandler()]
callback_manager = CallbackManager(callbacks)

### Constants

In [142]:
MAX_RETRIES = 3
MAX_SNIPPETS_PER_SOURCE = 5
RELEVANCE_THRESHOLD = 0.7

### ENVs

In [143]:
TAVILY_API_KEY = userdata.get('TAVILY_API_KEY')
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

GROQ_API_KEY = userdata.get('GROQ_API_KEY')
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"

## Embeddings

In [144]:
class SimpleEmbeddings(Embeddings):
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [[hash(text) % 100 / 100 for _ in range(1536)] for text in texts]

    def embed_query(self, text: str) -> List[float]:
        return [hash(text) % 100 / 100 for _ in range(1536)]

embeddings = SimpleEmbeddings()

## Tavily setup

In [145]:
tavily_search = TavilySearchResults(k=5)
research_tool = Tool(
    name="tavily_search",
    description="Use for factual web search via Tavily",
    func=tavily_search.run,
)

## Groq Setup

In [181]:
class GroqLLM(LLM):
    model_name: str = "llama-3.1-8b-instant"
    temperature: float = 0.7
    max_tokens: int = 1024

    @property
    def _llm_type(self) -> str:
        return "groq"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        headers = {
            "Authorization": f"Bearer {GROQ_API_KEY}",
            "Content-Type": "application/json"
        }

        data = {
            "model": self.model_name,
            "messages": [
                {"role": "user", "content": prompt}
            ],
            "temperature": kwargs.get("temperature", self.temperature),
            "max_tokens": kwargs.get("max_tokens", self.max_tokens)
        }

        if stop:
            data["stop"] = stop

        try:
            response = requests.post(GROQ_API_URL, headers=headers, json=data)
            response.raise_for_status()
            result = response.json()
            return result["choices"][0]["message"]["content"]
        except Exception as e:
            return f"Error: {str(e)}"

    def _identifying_params(self) -> Mapping[str, Any]:
        return {
            "model_name": self.model_name,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens
        }

### Creating instances

In [182]:
research_llm = GroqLLM(
    model_name="llama-3.3-70b-versatile",
    temperature=0.5,
    max_tokens=4096,
)

answer_drafting_llm = GroqLLM(
    model_name="llama-3.1-8b-instant",
    temperature=0.7,
    max_tokens=2048
)

fact_checking_llm = GroqLLM(
    model_name="llama-guard-3-8b",
    temperature=0.2,
    max_tokens=512,
)

## Sub-query setup

In [183]:
class Citation(BaseModel):
    claim: str = Field(description="A claim made in the draft answer")
    supported: bool = Field(description="Whether the claim is supported by the evidence")
    evidence: Optional[str] = Field(description="The snippet that supports or contradicts the claim")
    confidence: float = Field(description="Confidence score from 0-1")

class FactCheckResult(BaseModel):
    checked_claims: List[Citation] = Field(description="Results of fact checking the draft answer")
    overall_accuracy: float = Field(description="Overall accuracy score from 0-1")

fact_check_parser = PydanticOutputParser(pydantic_object=FactCheckResult)

## State Definitions

In [184]:
class MyGraphState(TypedDict):
    user_query: str
    snippets: List[Dict]
    all_snippets: List[Dict]
    verdict: str
    vector_store: Optional[Any]
    sources: List[Dict]

class DraftState(MyGraphState, total=False):
    retry_count: int
    answer: str
    draft_answer: str
    fact_check_result: Dict
    final_answer: str

### Model Prompts

In [185]:
fetch_prompt = """
You are a Research Assistant.
Call the function 'tavily_search' with this question: {query}
"""

evaluate_prompt = """
You are a Quality Evaluator.
Given these snippets for the question: {query}

Snippets:
{snippets}

Your task is to determine if ANY of these snippets contain relevant information that could help answer the question.
Respond with ONLY 'yes' if ANY snippet contains relevant information.
Respond with ONLY 'no' if NONE of the snippets contain relevant information.
"""

draft_prompt = """
You are an Answer Drafting Agent.

USER QUERY: {query}

INFORMATION SNIPPETS:
{snippets}

INSTRUCTIONS:
1. Use ONLY the information in the snippets above to answer the query
2. Write a comprehensive answer in paragraph form and not in point wise form.
3. Include the most important facts and recent breakthroughs mentioned in the snippets
4. Keep your answer factual and concise
5. Do NOT include any JSON formatting in your answer
"""

fact_check_prompt = """
You are a Fact Checking Agent.

USER QUERY: {query}

ORIGINAL SNIPPETS:
{snippets}

DRAFT ANSWER:
{draft_answer}

INSTRUCTIONS:
1. Analyze the draft answer and identify the main claims or statements
2. For each claim, verify if it is supported by the snippets provided
3. Format your response according to the following structure:
{format_instructions}

Focus on accuracy and provide specific evidence from the snippets that supports or contradicts each claim.
"""

final_answer_prompt = """
You are a Final Answer Compiler.

USER QUERY: {query}

DRAFT ANSWER:
{draft_answer}

FACT CHECK RESULTS:
{fact_check_results}

SOURCES:
{sources}

INSTRUCTIONS:
1. Review the draft answer and the fact check results
2. Create a final comprehensive answer that:
   a. Is accurate and factual, removing or modifying any unsupported claims
   b. Maintains a coherent and well-structured flow
   c. Includes citations to the original sources where appropriate using [Source X] notation
3. End the answer with a "Sources:" section that lists the sources used
4. Make the answer helpful, comprehensive, and accurate
"""

## Prompt Templates

In [186]:
fetch_prompt_template = PromptTemplate.from_template(fetch_prompt)
evaluate_prompt_template = PromptTemplate.from_template(evaluate_prompt)
draft_prompt_template = PromptTemplate.from_template(draft_prompt)

fact_check_template = PromptTemplate(
    template=fact_check_prompt,
    input_variables=["query", "snippets", "draft_answer"],
    partial_variables={"format_instructions": fact_check_parser.get_format_instructions()}
)

final_answer_template = PromptTemplate.from_template(final_answer_prompt)

### Prompt Chains

In [187]:
fetch_chain = fetch_prompt_template | research_llm
evaluate_chain = evaluate_prompt_template | research_llm
draft_chain = draft_prompt_template | answer_drafting_llm
fact_check_chain = fact_check_template | fact_checking_llm
final_answer_chain = final_answer_template | answer_drafting_llm

# Main Architecture

## Crawl Web using Tavily

In [188]:
def fetch_snippets(state: DraftState) -> DraftState:
    rc = state.get("retry_count", 0)
    all_snippets = state.get("all_snippets", [])
    query = state["user_query"]

    print(f"[FETCH] Searching for query: '{query}' (attempt {rc+1}/{MAX_RETRIES+1})")

    try:
        raw = research_tool.run(query)
        snippets = []

        if isinstance(raw, str):
            try:
                data = json.loads(raw)
            except json.JSONDecodeError:
                data = [{"text": raw}]
        else:
            data = raw

        if isinstance(data, dict):
            snippet_list = [data]
        elif isinstance(data, list):
            snippet_list = data
        else:
            snippet_list = [{"text": str(data)}]

        sources = []

        for idx, item in enumerate(snippet_list):
            if isinstance(item, dict):
                text = None
                title = item.get("title", "Unknown Source")
                url = item.get("url", "#")

                for key in ["content", "text", "snippet"]:
                    if key in item and item[key]:
                        text = item[key]
                        break

                if not text:
                    text = str(item)

                if isinstance(text, str) and text.startswith("{") and "content" in text:
                    try:
                        parsed = json.loads(text.replace("'", "\""))
                        if "content" in parsed:
                            text = parsed["content"]
                    except:
                        pass

                snippet_id = len(all_snippets) + len(snippets) + 1
                snippets.append({
                    "id": snippet_id,
                    "text": text,
                    "source_id": len(sources) + 1,
                    "query": query
                })

                if {"title": title, "url": url} not in sources:
                    sources.append({"title": title, "url": url})
            else:
                snippet_id = len(all_snippets) + len(snippets) + 1
                snippets.append({
                    "id": snippet_id,
                    "text": str(item),
                    "source_id": 0,
                    "query": query
                })

        print(f"[FETCH] Found {len(snippets)} snippets")
        if snippets:
            preview = snippets[0]["text"][:100] + "..." if len(snippets[0]["text"]) > 100 else snippets[0]["text"]
            print(f"[FETCH] Sample: \"{preview}\"")

        verdict = "yes" if snippets else "no"

        updated_all_snippets = all_snippets + snippets

        return {
            **state,
            "user_query": state["user_query"],
            "snippets": snippets,
            "all_snippets": updated_all_snippets,
            "sources": state.get("sources", []) + sources,
            "verdict": verdict,
            "retry_count": rc
        }

    except Exception as e:
        print(f"[FETCH] Error occurred during search")
        return {
            **state,
            "user_query": state["user_query"],
            "snippets": [],
            "all_snippets": state.get("all_snippets", []),
            "sources": state.get("sources", []),
            "verdict": "no",
            "retry_count": rc
        }

## Evaluate the fetch results

In [189]:
def evaluate_snippets(state: DraftState) -> DraftState:
    print(f"[EVALUATE] Analyzing {len(state['snippets'])} snippets for relevance")

    if not state["snippets"] or len(state["snippets"]) == 0:
        return {**state, "verdict": "no"}

    formatted_snippets = []
    for snippet in state["snippets"]:
        snippet_text = snippet["text"][:500] + "..." if len(snippet["text"]) > 500 else snippet["text"]
        formatted_snippets.append(f"Snippet {snippet['id']}: {snippet_text}")

    snippet_input = "\n\n".join(formatted_snippets)

    try:
        formatted_prompt = evaluate_prompt_template.format(
            query=state["user_query"],
            snippets=snippet_input
        )

        verdict = research_llm._call(formatted_prompt).strip().lower()

        if "yes" in verdict:
            verdict = "yes"
        else:
            verdict = "no"

    except Exception as e:
        verdict = state["verdict"]

    print(f"[EVALUATE] Verdict: {verdict.upper()}")
    return {**state, "verdict": verdict}


## Decide the next steps

In [190]:
def decide_next_step(state: DraftState) -> Literal["fetch_snippets", "ingest_snippets"]:
    rc = state.get("retry_count", 0)

    if state["verdict"].lower() == "no" and rc < MAX_RETRIES:
        print(f"[DECIDE] Insufficient information, retrying search")
        return "fetch_snippets"

    if state["verdict"].lower() == "no":
        print(f"[DECIDE] Max retries reached, proceeding with available data")

    print(f"[DECIDE] Sufficient information found, proceeding to draft")
    return "ingest_snippets"

## Ingest the snippets

In [191]:
def ingest_snippets(state: DraftState) -> DraftState:
    print(f"[INGEST] Processing {len(state.get('all_snippets', []))} total snippets for drafting")

    docs = []
    for snippet in state.get("all_snippets", []):
        metadata = {
            "id": snippet["id"],
            "source_id": snippet.get("source_id", 0),
            "query": snippet.get("query", state["user_query"])
        }
        docs.append(Document(page_content=snippet["text"], metadata=metadata))

    try:
        if not docs:
            print("[INGEST] No documents to ingest")
            return {**state, "retry_count": state.get("retry_count", 0) + 1}

        print(f"[INGEST] Creating vector store with {len(docs)} documents")
        vector_store = FAISS.from_documents(docs, embeddings)

        return {
            **state,
            "retry_count": state.get("retry_count", 0) + 1,
            "vector_store": vector_store
        }
    except Exception as e:
        print(f"[INGEST] Error creating vector store")
        return {**state, "retry_count": state.get("retry_count", 0) + 1}

## Draft final answer

In [192]:
def draft_answer(state: DraftState) -> DraftState:
    print(f"[DRAFT] Generating comprehensive response")

    all_snippets = state.get("all_snippets", [])

    formatted_snippets = []
    for snippet in all_snippets:
        snippet_text = snippet["text"][:500] + "..." if len(snippet["text"]) > 500 else snippet["text"]
        source_id = snippet.get("source_id", 0)
        source_text = f"[Source {source_id}]" if source_id > 0 else ""
        formatted_snippets.append(f"Snippet {snippet['id']}: {snippet_text} {source_text}")

    snippet_input = "\n\n".join(formatted_snippets)

    try:
        formatted_prompt = draft_prompt_template.format(
            query=state["user_query"],
            snippets=snippet_input
        )

        draft_answer = answer_drafting_llm._call(formatted_prompt)

    except Exception as e:
        draft_answer = f"Unable to generate answer due to technical difficulties."

    print(f"[DRAFT] Draft response generated ({len(draft_answer)} chars)")
    return {**state, "draft_answer": draft_answer}

In [193]:
def fact_check(state: DraftState) -> DraftState:
    print(f"[FACT-CHECK] Verifying claims in the draft answer")

    all_snippets = state.get("all_snippets", [])

    if not all_snippets or not state.get("draft_answer"):
        print("[FACT-CHECK] No snippets or draft to check, skipping verification")
        return {**state, "fact_check_result": {"overall_accuracy": 0.5}}

    formatted_snippets = []
    for snippet in all_snippets:
        snippet_text = snippet["text"][:300] + "..." if len(snippet["text"]) > 300 else snippet["text"]
        formatted_snippets.append(f"Snippet {snippet['id']}: {snippet_text}")

    snippet_input = "\n\n".join(formatted_snippets)

    try:
        formatted_prompt = fact_check_template.format(
            query=state["user_query"],
            snippets=snippet_input,
            draft_answer=state["draft_answer"]
        )

        fact_check_response = fact_checking_llm._call(formatted_prompt)

        try:
            import re
            json_match = re.search(r'```json\n(.*?)\n```', fact_check_response, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
                fact_check_result = json.loads(json_str)
            else:
                fact_check_result = json.loads(fact_check_response)

        except json.JSONDecodeError:
            print("[FACT-CHECK] Failed to parse result, using default")
            fact_check_result = {
                "checked_claims": [],
                "overall_accuracy": 0.7
            }

        print(f"[FACT-CHECK] Verification complete with accuracy: {fact_check_result.get('overall_accuracy', 0.7)}")

    except Exception as e:
        print(f"[FACT-CHECK] Error during verification")
        fact_check_result = {
            "checked_claims": [],
            "overall_accuracy": 0.5
        }

    return {**state, "fact_check_result": fact_check_result}

In [194]:
import os
import json
import time
import requests
from typing import Any, Dict, List, Optional, Mapping, Tuple, Union
from langchain_core.tools import Tool
from langchain.llms.base import BaseLLM
from langchain.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from langchain_core.language_models import LLM
from langchain_core.callbacks import CallbackManagerForLLMRun
from langgraph.graph import StateGraph, END
from typing import TypedDict, Literal, Annotated
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain_community.vectorstores import FAISS
from langchain_core.vectorstores import VectorStore
from langchain_core.embeddings import Embeddings
from langchain_core.documents import Document
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from google.colab import userdata

# Setup
callbacks = [StreamingStdOutCallbackHandler()]
callback_manager = CallbackManager(callbacks)

# Constants
MAX_RETRIES = 3
MAX_SNIPPETS_PER_SOURCE = 5
RELEVANCE_THRESHOLD = 0.7

# ENVs
TAVILY_API_KEY = userdata.get('TAVILY_API_KEY')
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

GROQ_API_KEY = userdata.get('GROQ_API_KEY')
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"

# Embeddings
class SimpleEmbeddings(Embeddings):
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [[hash(text) % 100 / 100 for _ in range(1536)] for text in texts]

    def embed_query(self, text: str) -> List[float]:
        return [hash(text) % 100 / 100 for _ in range(1536)]

embeddings = SimpleEmbeddings()

# Tavily setup
tavily_search = TavilySearchResults(k=5)
research_tool = Tool(
    name="tavily_search",
    description="Use for factual web search via Tavily",
    func=tavily_search.run,
)

# Groq Setup
class GroqLLM(LLM):
    model_name: str = "llama-3.1-8b-instant"
    temperature: float = 0.7
    max_tokens: int = 1024

    @property
    def _llm_type(self) -> str:
        return "groq"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        headers = {
            "Authorization": f"Bearer {GROQ_API_KEY}",
            "Content-Type": "application/json"
        }

        data = {
            "model": self.model_name,
            "messages": [
                {"role": "user", "content": prompt}
            ],
            "temperature": kwargs.get("temperature", self.temperature),
            "max_tokens": kwargs.get("max_tokens", self.max_tokens)
        }

        if stop:
            data["stop"] = stop

        try:
            response = requests.post(GROQ_API_URL, headers=headers, json=data)
            response.raise_for_status()
            result = response.json()
            return result["choices"][0]["message"]["content"]
        except Exception as e:
            return f"Error: {str(e)}"

    def _identifying_params(self) -> Mapping[str, Any]:
        return {
            "model_name": self.model_name,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens
        }

# Creating LLM instances
research_llm = GroqLLM(
    model_name="llama-3.1-8b-instant",
    temperature=0.5,
    max_tokens=512
)

answer_drafting_llm = GroqLLM(
    model_name="llama-3.1-8b-instant",
    temperature=0.7,
    max_tokens=1024
)

fact_checking_llm = GroqLLM(
    model_name="llama-3.1-8b-instant",
    temperature=0.2,
    max_tokens=512
)

# Fact check models
class Citation(BaseModel):
    claim: str = Field(description="A claim made in the draft answer")
    supported: bool = Field(description="Whether the claim is supported by the evidence")
    evidence: Optional[str] = Field(description="The snippet that supports or contradicts the claim")
    confidence: float = Field(description="Confidence score from 0-1")

class FactCheckResult(BaseModel):
    checked_claims: List[Citation] = Field(description="Results of fact checking the draft answer")
    overall_accuracy: float = Field(description="Overall accuracy score from 0-1")

fact_check_parser = PydanticOutputParser(pydantic_object=FactCheckResult)

# State Definitions
class MyGraphState(TypedDict):
    user_query: str
    snippets: List[Dict]
    all_snippets: List[Dict]
    verdict: str
    vector_store: Optional[Any]
    sources: List[Dict]

class DraftState(MyGraphState, total=False):
    retry_count: int
    answer: str
    draft_answer: str
    fact_check_result: Dict
    final_answer: str

# Prompts
fetch_prompt = """
You are a Research Assistant.
Call the function 'tavily_search' with this question: {query}
"""

evaluate_prompt = """
You are a Quality Evaluator.
Given these snippets for the question: {query}

Snippets:
{snippets}

Your task is to determine if ANY of these snippets contain relevant information that could help answer the question.
Respond with ONLY 'yes' if ANY snippet contains relevant information.
Respond with ONLY 'no' if NONE of the snippets contain relevant information.
"""

draft_prompt = """
You are an Answer Drafting Agent.

USER QUERY: {query}

INFORMATION SNIPPETS:
{snippets}

INSTRUCTIONS:
1. Use ONLY the information in the snippets above to answer the query
2. Write a comprehensive answer in paragraph form and not in point wise form.
3. Include the most important facts and recent breakthroughs mentioned in the snippets
4. Keep your answer factual and concise
5. Do NOT include any JSON formatting in your answer
"""

fact_check_prompt = """
You are a Fact Checking Agent.

USER QUERY: {query}

ORIGINAL SNIPPETS:
{snippets}

DRAFT ANSWER:
{draft_answer}

INSTRUCTIONS:
1. Analyze the draft answer and identify the main claims or statements
2. For each claim, verify if it is supported by the snippets provided
3. Format your response according to the following structure:
{format_instructions}

Focus on accuracy and provide specific evidence from the snippets that supports or contradicts each claim.
"""

final_answer_prompt = """
You are a Final Answer Compiler.

USER QUERY: {query}

DRAFT ANSWER:
{draft_answer}

FACT CHECK RESULTS:
{fact_check_results}

SOURCES:
{sources}

INSTRUCTIONS:
1. Review the draft answer and the fact check results
2. Create a final comprehensive answer that:
   a. Is accurate and factual, removing or modifying any unsupported claims
   b. Maintains a coherent and well-structured flow
   c. Includes citations to the original sources where appropriate using [Source X] notation
3. End the answer with a "Sources:" section that lists the sources used
4. Make the answer helpful, comprehensive, and accurate
"""

# Prompt Templates
fetch_prompt_template = PromptTemplate.from_template(fetch_prompt)
evaluate_prompt_template = PromptTemplate.from_template(evaluate_prompt)
draft_prompt_template = PromptTemplate.from_template(draft_prompt)

fact_check_template = PromptTemplate(
    template=fact_check_prompt,
    input_variables=["query", "snippets", "draft_answer"],
    partial_variables={"format_instructions": fact_check_parser.get_format_instructions()}
)

final_answer_template = PromptTemplate.from_template(final_answer_prompt)

# Prompt Chains
fetch_chain = fetch_prompt_template | research_llm
evaluate_chain = evaluate_prompt_template | research_llm
draft_chain = draft_prompt_template | answer_drafting_llm
fact_check_chain = fact_check_template | fact_checking_llm
final_answer_chain = final_answer_template | answer_drafting_llm

# Agent Functions
def fetch_snippets(state: DraftState) -> DraftState:
    rc = state.get("retry_count", 0)
    all_snippets = state.get("all_snippets", [])
    query = state["user_query"]

    print(f"[FETCH] Searching for query: '{query}' (attempt {rc+1}/{MAX_RETRIES+1})")

    try:
        raw = research_tool.run(query)
        snippets = []

        if isinstance(raw, str):
            try:
                data = json.loads(raw)
            except json.JSONDecodeError:
                data = [{"text": raw}]
        else:
            data = raw

        if isinstance(data, dict):
            snippet_list = [data]
        elif isinstance(data, list):
            snippet_list = data
        else:
            snippet_list = [{"text": str(data)}]

        sources = []

        for idx, item in enumerate(snippet_list):
            if isinstance(item, dict):
                text = None
                title = item.get("title", "Unknown Source")
                url = item.get("url", "#")

                for key in ["content", "text", "snippet"]:
                    if key in item and item[key]:
                        text = item[key]
                        break

                if not text:
                    text = str(item)

                if isinstance(text, str) and text.startswith("{") and "content" in text:
                    try:
                        parsed = json.loads(text.replace("'", "\""))
                        if "content" in parsed:
                            text = parsed["content"]
                    except:
                        pass

                snippet_id = len(all_snippets) + len(snippets) + 1
                snippets.append({
                    "id": snippet_id,
                    "text": text,
                    "source_id": len(sources) + 1,
                    "query": query
                })

                if {"title": title, "url": url} not in sources:
                    sources.append({"title": title, "url": url})
            else:
                snippet_id = len(all_snippets) + len(snippets) + 1
                snippets.append({
                    "id": snippet_id,
                    "text": str(item),
                    "source_id": 0,
                    "query": query
                })

        print(f"[FETCH] Found {len(snippets)} snippets")
        if snippets:
            preview = snippets[0]["text"][:100] + "..." if len(snippets[0]["text"]) > 100 else snippets[0]["text"]
            print(f"[FETCH] Sample: \"{preview}\"")

        verdict = "yes" if snippets else "no"

        updated_all_snippets = all_snippets + snippets

        return {
            **state,
            "user_query": state["user_query"],
            "snippets": snippets,
            "all_snippets": updated_all_snippets,
            "sources": state.get("sources", []) + sources,
            "verdict": verdict,
            "retry_count": rc
        }

    except Exception as e:
        print(f"[FETCH] Error occurred during search")
        return {
            **state,
            "user_query": state["user_query"],
            "snippets": [],
            "all_snippets": state.get("all_snippets", []),
            "sources": state.get("sources", []),
            "verdict": "no",
            "retry_count": rc
        }

def evaluate_snippets(state: DraftState) -> DraftState:
    print(f"[EVALUATE] Analyzing {len(state['snippets'])} snippets for relevance")

    if not state["snippets"] or len(state["snippets"]) == 0:
        return {**state, "verdict": "no"}

    formatted_snippets = []
    for snippet in state["snippets"]:
        snippet_text = snippet["text"][:500] + "..." if len(snippet["text"]) > 500 else snippet["text"]
        formatted_snippets.append(f"Snippet {snippet['id']}: {snippet_text}")

    snippet_input = "\n\n".join(formatted_snippets)

    try:
        formatted_prompt = evaluate_prompt_template.format(
            query=state["user_query"],
            snippets=snippet_input
        )

        verdict = research_llm._call(formatted_prompt).strip().lower()

        if "yes" in verdict:
            verdict = "yes"
        else:
            verdict = "no"

    except Exception as e:
        verdict = state["verdict"]

    print(f"[EVALUATE] Verdict: {verdict.upper()}")
    return {**state, "verdict": verdict}

def decide_next_step(state: DraftState) -> Literal["fetch_snippets", "ingest_snippets"]:
    rc = state.get("retry_count", 0)

    if state["verdict"].lower() == "no" and rc < MAX_RETRIES:
        print(f"[DECIDE] Insufficient information, retrying search")
        return "fetch_snippets"

    if state["verdict"].lower() == "no":
        print(f"[DECIDE] Max retries reached, proceeding with available data")

    print(f"[DECIDE] Sufficient information found, proceeding to draft")
    return "ingest_snippets"

def ingest_snippets(state: DraftState) -> DraftState:
    print(f"[INGEST] Processing {len(state.get('all_snippets', []))} total snippets for drafting")

    docs = []
    for snippet in state.get("all_snippets", []):
        metadata = {
            "id": snippet["id"],
            "source_id": snippet.get("source_id", 0),
            "query": snippet.get("query", state["user_query"])
        }
        docs.append(Document(page_content=snippet["text"], metadata=metadata))

    try:
        if not docs:
            print("[INGEST] No documents to ingest")
            return {**state, "retry_count": state.get("retry_count", 0) + 1}

        print(f"[INGEST] Creating vector store with {len(docs)} documents")
        vector_store = FAISS.from_documents(docs, embeddings)

        return {
            **state,
            "retry_count": state.get("retry_count", 0) + 1,
            "vector_store": vector_store
        }
    except Exception as e:
        print(f"[INGEST] Error creating vector store")
        return {**state, "retry_count": state.get("retry_count", 0) + 1}

def draft_answer(state: DraftState) -> DraftState:
    print(f"[DRAFT] Generating comprehensive response")

    all_snippets = state.get("all_snippets", [])

    formatted_snippets = []
    for snippet in all_snippets:
        snippet_text = snippet["text"][:500] + "..." if len(snippet["text"]) > 500 else snippet["text"]
        source_id = snippet.get("source_id", 0)
        source_text = f"[Source {source_id}]" if source_id > 0 else ""
        formatted_snippets.append(f"Snippet {snippet['id']}: {snippet_text} {source_text}")

    snippet_input = "\n\n".join(formatted_snippets)

    try:
        formatted_prompt = draft_prompt_template.format(
            query=state["user_query"],
            snippets=snippet_input
        )

        draft_answer = answer_drafting_llm._call(formatted_prompt)

    except Exception as e:
        draft_answer = f"Unable to generate answer due to technical difficulties."

    print(f"[DRAFT] Draft response generated ({len(draft_answer)} chars)")
    return {**state, "draft_answer": draft_answer}

def fact_check(state: DraftState) -> DraftState:
    print(f"[FACT-CHECK] Verifying claims in the draft answer")

    all_snippets = state.get("all_snippets", [])

    if not all_snippets or not state.get("draft_answer"):
        print("[FACT-CHECK] No snippets or draft to check, skipping verification")
        return {**state, "fact_check_result": {"overall_accuracy": 0.5}}

    formatted_snippets = []
    for snippet in all_snippets:
        snippet_text = snippet["text"][:300] + "..." if len(snippet["text"]) > 300 else snippet["text"]
        formatted_snippets.append(f"Snippet {snippet['id']}: {snippet_text}")

    snippet_input = "\n\n".join(formatted_snippets)

    try:
        formatted_prompt = fact_check_template.format(
            query=state["user_query"],
            snippets=snippet_input,
            draft_answer=state["draft_answer"]
        )

        fact_check_response = fact_checking_llm._call(formatted_prompt)

        try:
            import re
            json_match = re.search(r'```json\n(.*?)\n```', fact_check_response, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
                fact_check_result = json.loads(json_str)
            else:
                fact_check_result = json.loads(fact_check_response)

        except json.JSONDecodeError:
            print("[FACT-CHECK] Failed to parse result, using default")
            fact_check_result = {
                "checked_claims": [],
                "overall_accuracy": 0.7
            }

        print(f"[FACT-CHECK] Verification complete with accuracy: {fact_check_result.get('overall_accuracy', 0.7)}")

    except Exception as e:
        print(f"[FACT-CHECK] Error during verification")
        fact_check_result = {
            "checked_claims": [],
            "overall_accuracy": 0.5
        }

    return {**state, "fact_check_result": fact_check_result}

def compile_final_answer(state: DraftState) -> DraftState:
    print(f"[COMPILE] Creating final answer with citations")

    draft_answer = state.get("draft_answer", "No draft answer was generated.")
    fact_check_result = state.get("fact_check_result", {"overall_accuracy": 0.5})
    sources = state.get("sources", [])

    formatted_sources = []
    for idx, source in enumerate(sources):
        title = source.get("title", "Unknown Source")
        url = source.get("url", "#")
        formatted_sources.append(f"Source {idx+1}: {title} - {url}")

    sources_text = "\n".join(formatted_sources)

    try:
        simplified_fact_check = {
            "overall_accuracy": fact_check_result.get("overall_accuracy", 0.5),
            "summary": f"Overall accuracy is {fact_check_result.get('overall_accuracy', 0.5)*100:.0f}%"
        }

        formatted_prompt = final_answer_template.format(
            query=state["user_query"],
            draft_answer=draft_answer,
            fact_check_results=json.dumps(simplified_fact_check, indent=2),
            sources=sources_text
        )

        final_answer = answer_drafting_llm._call(formatted_prompt)

    except Exception as e:
        print(f"[COMPILE] Error during finalization")
        final_answer = draft_answer

    print(f"[COMPILE] Final answer created ({len(final_answer)} chars)")
    return {**state, "answer": final_answer}

## Creating the graph

In [195]:
graph = StateGraph(DraftState)

# Add nodes
graph.add_node("fetch_snippets", fetch_snippets)
graph.add_node("evaluate_snippets", evaluate_snippets)
graph.add_node("decide_next_step", decide_next_step)
graph.add_node("ingest_snippets", ingest_snippets)
graph.add_node("create_draft", draft_answer)
graph.add_node("fact_check", fact_check)
graph.add_node("compile_final_answer", compile_final_answer)

# Connect nodes
graph.set_entry_point("fetch_snippets")
graph.add_edge("fetch_snippets", "evaluate_snippets")
graph.add_conditional_edges(
    "evaluate_snippets",
    decide_next_step,
    {
        "fetch_snippets": "fetch_snippets",
        "ingest_snippets": "ingest_snippets"
    }
)
graph.add_edge("ingest_snippets", "create_draft")
graph.add_edge("create_draft", "fact_check")
graph.add_edge("fact_check", "compile_final_answer")
graph.add_edge("compile_final_answer", END)

# Compile the graph
app = graph.compile()

## Final answer

In [196]:
def run_research(query):
    initial_state = {"user_query": query, "retry_count": 0}
    print(f"\nRESEARCH QUERY: {query}")
    print("=" * 60)

    final_state = app.invoke(initial_state)

    print("=" * 60)
    print(final_state.get("answer", "No answer was generated."))
    print("=" * 60)

In [197]:
if __name__ == "__main__":
    result = run_research("What are the leading innovations in Quantum Computing. Tell some applications for quantum computing")


RESEARCH QUERY: What are the leading innovations in Quantum Computing. Tell some applications for quantum computing
[FETCH] Searching for query: 'What are the leading innovations in Quantum Computing. Tell some applications for quantum computing' (attempt 1/4)
[FETCH] Found 5 snippets
[FETCH] Sample: "Quantum computing is already shaking up the world of cybersecurity. With techniques like Quantum Key..."
[EVALUATE] Analyzing 5 snippets for relevance
[EVALUATE] Verdict: YES
[DECIDE] Sufficient information found, proceeding to draft
[INGEST] Processing 5 total snippets for drafting
[INGEST] Creating vector store with 5 documents
[DRAFT] Generating comprehensive response
[DRAFT] Draft response generated (2285 chars)
[FACT-CHECK] Verifying claims in the draft answer
[FACT-CHECK] Failed to parse result, using default
[FACT-CHECK] Verification complete with accuracy: 0.7
[COMPILE] Creating final answer with citations
[COMPILE] Final answer created (3128 chars)
Final Answer:

FINAL ANSWER:

