# Phase 1

In [8]:
# 1. Install all required libraries
!pip install -q dspy-ai langgraph langchain langchain_openai qdrant-client tavily-python datasets
!pip install -U langchain-community langchain-google-genai

# 2. Set up API keys from Colab secrets
import os
from google.colab import userdata

# Ensure you have set these secrets in your Colab environment
os.environ["GOOGLE_API_KEY"] = userdata.get('LLM_API')
os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY')

print("Phase 1 Complete: Libraries installed and GOOGLE_API_KEY and TAVILY_API_KEY configured.")

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.8-py3-none-any.whl.metadata (7.0 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Downloading langchain_google_genai-2.1.8-py3-none-any.whl (47 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.8/47.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: filetype, google-ai-generativelanguage, langchain-google-genai
  Attempting uninstall: google-ai-generativelangu

Phase 1 Complete: Libraries installed and GOOGLE_API_KEY and TAVILY_API_KEY configured.


# Phase 2

In [9]:
import qdrant_client
from datasets import load_dataset
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Qdrant

# 1. Load a small portion of the GSM8K dataset
dataset = load_dataset("gsm8k", "main", split="train[:100]")
questions = [item['question'] for item in dataset]
# We combine the question and answer for richer context during retrieval
documents_for_kb = [f"Question: {item['question']}\nAnswer: {item['answer']}" for item in dataset]

# 2. Initialize embeddings model
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# 3. Setup an in-memory Qdrant instance
# Using an in-memory instance is perfect for notebooks as it requires no setup or teardown
qdrant_instance = Qdrant.from_texts(
    documents_for_kb,
    embeddings,
    location=":memory:", # Use in-memory storage
    collection_name="math_kb",
)

# 4. Create a retriever to query the knowledge base
retriever = qdrant_instance.as_retriever(search_kwargs={"k": 3})

# 5. Test the retrieval
sample_query = "How many pencils does Mark have left?"
retrieved_docs = retriever.invoke(sample_query)

print("--- Phase 2 Complete: Knowledge Base Created with Gemini Embeddings ---")
print(f"\nSample Query: '{sample_query}'")
print("\nTop Retrieved Document:")
print(retrieved_docs[0].page_content)

--- Phase 2 Complete: Knowledge Base Created with Gemini Embeddings ---

Sample Query: 'How many pencils does Mark have left?'

Top Retrieved Document:
Question: Arnel had ten boxes of pencils with the same number of pencils in each box.  He kept ten pencils and shared the remaining pencils equally with his five friends. If his friends got eight pencils each, how many pencils are in each box?
Answer: Arnel shared 5 x 8 = <<5*8=40>>40 pencils with his friends.
So, he had 10 + 40 = <<10+40=50>>50 pencils in all.
Therefore, each box had 50/10 = <<50/10=5>>5 pencils inside.
#### 5


# Phase 3


In [14]:
import operator
from typing import TypedDict, Annotated, List
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI # Changed import
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.graph import StateGraph, END

# --- 1. Define Agent State and Tools ---

class AgentState(TypedDict):
    question: str
    documents: List[str]
    generation: str
    source: str

# Initialize our tools
web_search_tool = TavilySearchResults(k=3)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash") # Changed model initialization

# --- 2. Define Graph Nodes ---

def retrieve_from_kb(state):
    """Node to retrieve documents from the knowledge base."""
    print("---NODE: RETRIEVE FROM KB---")
    question = state["question"]
    documents = retriever.invoke(question)
    return {"documents": [doc.page_content for doc in documents], "source": "KB"}

def web_search(state):
    """Node for web search using Tavily, simulating MCP output."""
    print("---NODE: WEB SEARCH---")
    question = state["question"]
    search_results = web_search_tool.invoke({"query": question})
    # Simulate MCP by formatting the output clearly
    mcp_formatted_docs = [f"Source: {res['url']}\nContent: {res['content']}" for res in search_results]
    return {"documents": mcp_formatted_docs, "source": "Web"}

def grade_documents(state):
    """Node to grade the relevance of retrieved documents."""
    print("---NODE: GRADE DOCUMENTS---")
    question = state["question"]
    documents = state["documents"]
    source = state["source"] # Get the source to check if it was a web search

    # Note: The output parser JsonOutputFunctionsParser might not work directly
    # with ChatGoogleGenerativeAI in the same way as with ChatOpenAI functions.
    # We'll proceed with a basic implementation that relies on the model to
    # follow instructions to output "yes" or "no" directly. If more structured
    # output is needed, a different output parser or approach would be required.

    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a grader assessing relevance of a retrieved document to a user question. Respond with 'yes' if relevant, 'no' otherwise."),
        ("human", "Retrieved document:\n\n{document}\n\nUser question: {question}"),
    ])

    # We only grade the top document for simplicity
    if not documents:
        grade = "no"
    else:
        doc_to_grade = documents[0]
        chain = prompt | llm # Use the Gemini model directly
        result = chain.invoke({"question": question, "document": doc_to_grade})
        # Attempt to parse the response as "yes" or "no"
        grade = result.content.strip().lower()
        if grade not in ["yes", "no"]:
            print(f"Warning: Unexpected grading response: {grade}. Defaulting to 'no'.")
            grade = "no"


    print(f"Grade: {grade}")
    return {"grade": grade, "source": source} # Return source as well

def generate_solution(state):
    """Node to generate the final answer."""
    print("---NODE: GENERATE SOLUTION---")
    question = state["question"]
    documents = state["documents"]
    source = state["source"]

    prompt = ChatPromptTemplate.from_template(
        """You are a helpful math professor. Your goal is to provide a clear, step-by-step solution to the user's question.

        Use the following context from your knowledge source ({source}) to answer the question. If the context is empty or not useful, use your own knowledge but state that you are doing so.

        Context:
        {context}

        Question:
        {question}

        Provide your final answer as a step-by-step solution."""
    )
    chain = prompt | llm
    generation = chain.invoke({"context": "\n\n".join(documents), "question": question, "source": source})
    return {"generation": generation.content}

def handle_no_solution(state):
    """Node to handle cases where no solution can be found."""
    print("---NODE: HANDLE NO SOLUTION---")
    return {"generation": "I'm sorry, but I couldn't find a reliable answer in my knowledge base or through a web search. Please try rephrasing your question."}

# --- 3. Define Graph Edges (Routing Logic) ---

def decide_next_step(state):
    """Conditional edge to decide the next step after grading."""
    print("---EDGE: DECIDING NEXT STEP---")
    if state["grade"] == "yes":
        return "generate"
    else:
        if state["source"] == "KB":
            print("KB retrieval failed. Routing to Web Search.")
            return "web_search"
        else: # If source is Web and grade is no
            print("Web Search also failed. Routing to end.")
            return "handle_fail"

# --- 4. Construct the Graph ---

workflow = StateGraph(AgentState)

# Add nodes
workflow.add_node("retrieve_kb", retrieve_from_kb)
workflow.add_node("web_search", web_search)
workflow.add_node("grade_docs", grade_documents)
workflow.add_node("generate", generate_solution)
workflow.add_node("handle_fail", handle_no_solution)

# Add edges
workflow.set_entry_point("retrieve_kb")
workflow.add_edge("retrieve_kb", "grade_docs")
workflow.add_conditional_edges(
    "grade_docs",
    decide_next_step,
    {
        "web_search": "web_search",
        "generate": "generate",
        "handle_fail": "handle_fail",
    },
)
workflow.add_edge("web_search", "grade_docs") # Loop back to grade after searching
workflow.add_edge("generate", END)
workflow.add_edge("handle_fail", END)

# Compile the graph
app = workflow.compile()
print("\n--- Phase 3 Complete: Agentic Workflow Compiled ---")

# --- 5. Test the Agent ---

# Test Case 1: A question that should be in the Knowledge Base
kb_question = "Natalia sold 48 liters of milk in the morning. In the afternoon, she sold 27 liters less than in the morning. How many liters of milk did she sell in total?"
print("\n\n--- TESTING KB PATH ---")
inputs = {"question": kb_question}
for output in app.stream(inputs, {"recursion_limit": 10}): # Increased recursion limit
    for key, value in output.items():
        print(f"Node '{key}':\n{value}\n")

# Test Case 2: A question that requires a web search
web_question = "What is the formula for the volume of a torus?"
print("\n\n--- TESTING WEB SEARCH PATH ---")
inputs = {"question": web_question}
for output in app.stream(inputs, {"recursion_limit": 10}): # Increased recursion limit
    for key, value in output.items():
        print(f"Node '{key}':\n{value}\n")

# Store one of the answers for the next phase
initial_answer_for_feedback = app.invoke(inputs, {"recursion_limit": 10})['generation']


--- Phase 3 Complete: Agentic Workflow Compiled ---


--- TESTING KB PATH ---
---NODE: RETRIEVE FROM KB---
Node 'retrieve_kb':
{'documents': ['Question: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\nAnswer: Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72', 'Question: Jennifer purchased 40 cans of milk at the store before meeting her classmate Mark, who was also buying milk. Jennifer bought 6 additional cans for every 5 cans Mark bought. If Mark purchased 50 cans, how many cans of milk did Jennifer bring home from the store?\nAnswer: If Mark bought 50 cans of milk, the number of times Jennifer added 6 cans for every 5 that Mark bought is 50/5 = <<50/5=10>>10 times.\nThe total number of additional cans she bought is 10*6 = <<10*6=60>>60 cans.\nIf she initially had 40 cans, she went home with 40+60 

# Phase 4


In [25]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

# Create Gemini instance (use your actual API key if not done earlier)
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)

# Prompt template replicating the feedback refinement logic
refine_prompt = ChatPromptTemplate.from_template("""
You are a math professor reviewing and improving a previously generated answer based on student feedback.

Original Question:
{question}

Initial Answer:
{initial_answer}

Student Feedback:
{feedback}

Now revise the solution to improve clarity and correctness. Provide a step-by-step refined answer.
""")

# Chain it together
refine_chain = refine_prompt | llm


In [26]:
# Use previously defined variables
original_question = web_question
initial_answer = initial_answer_for_feedback
human_feedback = "This is good, but can you also explain what 'R' and 'r' represent in the formula?"

print("--- Phase 4 Complete: Simulating Feedback Loop ---")
print(f"\nOriginal Question: {original_question}")
print(f"\nInitial Answer:\n{initial_answer}")
print(f"\nSimulated Human Feedback: '{human_feedback}'")

# Generate the refined answer
response = refine_chain.invoke({
    "question": original_question,
    "initial_answer": initial_answer,
    "feedback": human_feedback
})

# Extract refined answer
refined_answer = response.content

print(f"\n--- REFINED ANSWER ---\n{refined_answer}")


--- Phase 4 Complete: Simulating Feedback Loop ---

Original Question: What is the formula for the volume of a torus?

Initial Answer:
The formula for the volume of a torus is derived using calculus, but the final result is straightforward to apply.  Here's a step-by-step explanation:

**1. Understanding the Variables:**

* **r:** This represents the radius of the circular cross-section of the torus (essentially, the radius of the "tube" of the donut).
* **R:** This represents the distance from the center of the torus to the center of the circular cross-section (essentially, the radius of the circle formed by the center of the tube).

**2. The Formula:**

The volume (V) of a torus is given by the formula:

V = 2π²r²R

**3. Applying the Formula:**

To calculate the volume, simply substitute the values of 'r' and 'R' into the formula and perform the calculation.  Remember to use the value of π (pi) as approximately 3.14159.

**Example:**

Let's say we have a torus with r = 5 cm and R = 1

In [27]:
refined_document = f"Question: {original_question}\nAnswer: {refined_answer}"
qdrant_instance.add_texts([refined_document])


['96ea90a7dce9437f86ddffb4a0411234']