In [14]:
# %% [1. Install Dependencies]
!pip install -q langchain langgraph chromadb langchain-community tiktoken langchain-groq wikipedia arxiv python-dotenv

# %% [2. Environment Setup]
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY", "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")

# %% [3. Document Loading & Indexing]
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

# Load documents
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

try:
    docs = []
    for url in urls:
        try:
            loader = WebBaseLoader(url)
            docs.extend(loader.load())
        except Exception as e:
            print(f"Failed to load {url}: {str(e)}")
    
    # Split documents
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=512,
        chunk_overlap=128,
        separators=["\n\n", "\n", " ", ""]
    )
    doc_splits = text_splitter.split_documents(docs)
    
    # Initialize ChromaDB
    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': True}
    )
    
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        embedding=embeddings,
        persist_directory="./chroma_db",
        collection_metadata={"hnsw:space": "cosine"}
    )
    print(f"Successfully indexed {len(doc_splits)} document chunks")
except Exception as e:
    print(f"Indexing failed: {str(e)}")
    raise

# %% [4. Initialize Retriever]
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# %% [5. Tool Setup]
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper

# Wikipedia tool
wiki = WikipediaQueryRun(
    api_wrapper=WikipediaAPIWrapper(
        top_k_results=2,
        doc_content_chars_max=2000,
        load_all_available_meta=True
    )
)

# ArXiv tool
arxiv = ArxivQueryRun(
    api_wrapper=ArxivAPIWrapper(
        top_k_results=2,
        doc_content_chars_max=2000
    )
)

# %% [6. Router Setup]
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

# Router model
class RouteQuery(BaseModel):
    datasource: Literal["vectorstore", "wikipedia", "arxiv"] = Field(
        description="Source to answer the question"
    )

# LLM setup
llm = ChatGroq(
    model_name="deepseek-r1-distill-llama-70b",
    temperature=0.3,
    max_tokens=1024
)

# Router chain
# %% [Complete Fixed Implementation]
from typing import TypedDict, List, Optional
from langchain_core.documents import Document
from langgraph.graph import StateGraph, END

# Updated state definition
class AgentState(TypedDict):
    question: str
    documents: List[Document]
    generation: str
    errors: List[str]
    routing_decision: Optional[str]

# Initialize graph
workflow = StateGraph(AgentState)

# Define nodes with proper state returns
def retrieve(state: AgentState):
    try:
        docs = retriever.invoke(state["question"])
        return {
            "documents": docs,
            "errors": [],
            "routing_decision": state.get("routing_decision")
        }
    except Exception as e:
        return {
            "documents": [],
            "errors": [f"Retrieval error: {str(e)}"],
            "routing_decision": state.get("routing_decision")
        }

def wiki_search(state: AgentState):
    try:
        content = wiki.invoke({"query": state["question"]})
        return {
            "documents": [Document(page_content=content)],
            "errors": [],
            "routing_decision": state.get("routing_decision")
        }
    except Exception as e:
        return {
            "documents": [],
            "errors": [f"Wikipedia error: {str(e)}"],
            "routing_decision": state.get("routing_decision")
        }

def arxiv_search(state: AgentState):
    try:
        content = arxiv.invoke({"query": state["question"]})
        return {
            "documents": [Document(page_content=content)],
            "errors": [],
            "routing_decision": state.get("routing_decision")
        }
    except Exception as e:
        return {
            "documents": [],
            "errors": [f"ArXiv error: {str(e)}"],
            "routing_decision": state.get("routing_decision")
        }

def generate_response(state: AgentState):
    try:
        context = "\n".join(doc.page_content for doc in state["documents"])
        prompt = f"Question: {state['question']}\nContext: {context}"
        response = llm.invoke(prompt)
        return {
            "generation": response.content,
            "errors": state["errors"],
            "routing_decision": state.get("routing_decision")
        }
    except Exception as e:
        return {
            "generation": "",
            "errors": state["errors"] + [f"Generation error: {str(e)}"],
            "routing_decision": state.get("routing_decision")
        }

def route_question(state: AgentState):
    try:
        route = router_chain.invoke({"question": state["question"]})
        return {
            "question": state["question"],
            "documents": [],
            "generation": "",
            "errors": [],
            "routing_decision": route.datasource
        }
    except Exception as e:
        return {
            "question": state["question"],
            "documents": [],
            "generation": "",
            "errors": [f"Routing error: {str(e)}"],
            "routing_decision": "vectorstore"
        }

# Add nodes
workflow.add_node("router", route_question)
workflow.add_node("retrieve", retrieve)
workflow.add_node("wiki_search", wiki_search)
workflow.add_node("arxiv_search", arxiv_search)
workflow.add_node("generate", generate_response)

# Set entry point
workflow.set_entry_point("router")

# Add edges
workflow.add_conditional_edges(
    "router",
    lambda state: state.get("routing_decision", "vectorstore"),
    {
        "vectorstore": "retrieve",
        "wikipedia": "wiki_search",
        "arxiv": "arxiv_search"
    }
)

workflow.add_edge("retrieve", "generate")
workflow.add_edge("wiki_search", "generate")
workflow.add_edge("arxiv_search", "generate")
workflow.add_edge("generate", END)

# Compile
app = workflow.compile()

# %% [Fixed Test Function]
def run_query(question):
    print(f"\n🔍 Query: {question}")
    try:
        # Initialize with empty state
        result = app.invoke({
            "question": question,
            "documents": [],
            "generation": "",
            "errors": [],
            "routing_decision": None
        })
        
        print("\n📝 Answer:", result.get("generation", "No answer generated"))
        
        if result.get("documents"):
            print("\n🔗 Sources:")
            for i, doc in enumerate(result["documents"], 1):
                source = doc.metadata.get("source", "unknown")
                print(f"{i}. {source}: {doc.page_content[:100]}...")
        
        if result.get("errors"):
            print("\n⚠️ Errors:")
            for error in result["errors"]:
                print(f"- {error}")
                
    except Exception as e:
        print(f"\n❌ System error: {str(e)}")

# Test cases
test_queries = [
    "What is chain-of-thought prompting?",
    "Explain LLM adversarial attacks",
    "Who is Yoshua Bengio?",
    "Latest papers on transformer architectures",
    "Invalid query test 123#$%"
]

for query in test_queries:
    run_query(query)
    print("\n" + "="*80 + "\n")

Successfully indexed 92 document chunks

🔍 Query: What is chain-of-thought prompting?

📝 Answer: <think>
Okay, so I'm trying to understand what chain-of-thought prompting is. From the context provided, it seems like it's a technique used with large language models (LLMs) to help them handle complex tasks. Let me break this down.

First, the context mentions that complex tasks usually involve many steps. An autonomous agent needs to plan ahead, which makes sense because without planning, it might get stuck or make mistakes. So, task decomposition is important here. The term "chain of thought" (CoT) is introduced as a standard prompting technique. It's by Wei et al. in 2022, so it's a relatively recent development.

The idea is to instruct the model to "think step by step." This makes the model use more test-time computation to break down hard tasks into smaller, simpler steps. That way, the model can manage each part individually, making the overall task less daunting. It also mentions 