# Week 2 Validation Notebook
Tests all components of the Multi-Agent LangGraph System.
Date: 11:11 AM +07, November 01, 2025

In [None]:
# --- Setup Project Path ---
import sys
from pathlib import Path

def find_project_root():
    path = Path.cwd()
    while path != path.parent:
        if (path / "src").exists():
            return path
        path = path.parent
    return Path.cwd()

PROJECT_ROOT = find_project_root()
sys.path.insert(0, str(PROJECT_ROOT))

print(f"PROJECT_ROOT: {PROJECT_ROOT}")
print(f"src/ exists: {(PROJECT_ROOT / 'src').exists()}")

PROJECT_ROOT: d:\Learn\AIE Project\arxiv-insight-engine
src/ exists: True


In [None]:
# --- Imports ---
import os
from src.agents.graph import app, AgentState
from src.agents.tools.hybrid_retriever import EnsembleRetriever
from src.agents.tools.summarizer import Summarizer
from src.agents.tools.image_captioner import ImageCaptioner
from src.agents.nodes.retriever import retrieve
from src.agents.nodes.summarizer import summarize
from src.agents.nodes.visual_analyzer import analyze_figures
from src.agents.nodes.synthesizer import synthesize
from src.agents.nodes.fact_checker import fact_check
from src.stores.feedback_store import store_feedback
from langchain_core.messages import HumanMessage
import uuid

print("All imports successful!")



Device set to use cpu
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cpu


All imports successful!


## 1. Test Tools

In [None]:
# --- 1.1 Hybrid Retriever ---
hybrid_retriever = EnsembleRetriever.from_qdrant()

# Pass the user query
query = "what is machine learning?"
results = hybrid_retriever.retrieve(query, k=10)

# Process results
if results:
    for doc in results:
        print(f"Content: {doc.page_content[:100]}... Type: {doc.metadata.get('type', 'unknown')}")
else:
    print("No results returned. Check Qdrant data or retriever setup.")













In [7]:
results

[Document(metadata={'_id': '6fae486d-e968-41d6-acb6-c0aef723580d', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': '98874f8b-0af2-4a55-9fb0-683eb98bf363', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': 'f100e488-e798-43e8-bb38-8e1747b5cc6e', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': '0cb13cf3-9b53-4b4c-a1f9-df3eede22c71', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': '4ff3b431-c22b-46bc-87b4-a2bb2023edb6', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': 'bf78cb7d-0ecd-4092-a524-9163a36ac7df', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': '81583444-0f8f-4579-8bec-593f4a3f3ce1', '_collection_name': 'arxiv_multimodal'}, page_content=''),
 Document(metadata={'_id': 'a3ad5297-1ae2-4a6e-889b-544d5b878bb8', '_collection_name': 'arxiv_multimodal'}, page_conte

In [4]:
# --- 1.2 Summarizer ---
summarizer = Summarizer()
sample_text = "This is a long text about LLMs and their scaling laws..." * 10
summaries = summarizer.summarize_texts([sample_text])
print(f"Summary: {summaries[0][:100]}...")

Summary: This is a long text about LLMs and their scaling laws. This is aLong text about LLMs and their scali...


In [5]:
# --- 1.3 Image Captioner ---
captioner = ImageCaptioner()
# Mock base64 (use a real one from your data if available)
mock_b64 = "data:image/png;base64,iVBORw0KG..."  # Truncated for brevity
captions = captioner.caption_images([mock_b64])
print(f"Caption: {captions[0]}")

Caption: Error: Incorrect image source. Must be a valid URL starting with `http://` or `https://`, a valid path to an image file, or a base64 encoded string. Got iVBORw0KG.... Failed with Invalid base64-encoded string: number of data characters (9) cannot be 1 more than a multiple of 4


In [6]:
# --- 1.4 Feedback Store ---
store_feedback("test query", "Corrected: use HyDE method")
print("Feedback stored successfully!")

TypeError: object of type 'PointStruct' has no len()

## 2. Test Nodes

In [None]:
# --- 2.1 Retriever Node ---
initial_state = {"query": "scaling laws in LLMs", "messages": [HumanMessage(content="Start")]}
retrieved_state = retrieve(initial_state)
print(f"Retrieved {len(retrieved_state['retrieved_chunks'])} chunks")

In [None]:
# --- 2.2 Summarizer Node ---
state_with_chunks = {**initial_state, "retrieved_chunks": docs}
summarized_state = summarize(state_with_chunks)
print(f"Summaries: {summarized_state['summaries'][:1]}")

In [None]:
# --- 2.3 Visual Analyzer Node ---
visual_state = analyze_figures(state_with_chunks)
print(f"Figure Insights: {visual_state['figure_insights'][:1]}")

In [None]:
# --- 2.4 Synthesizer Node ---
synth_state = synthesize({
    "query": "scaling laws",
    "summaries": summarized_state['summaries'],
    "figure_insights": visual_state['figure_insights']
})
print(f"Synthesis: {synth_state['synthesis'][:200]}...")

In [None]:
# --- 2.5 Fact Checker Node ---
fact_state = fact_check({
    "synthesis": synth_state['synthesis'],
    "retrieved_chunks": docs
})
print(f"Verified: {fact_state['verified']}")

## 3. Test Full Graph

In [None]:
# --- 3.1 Run Full Pipeline ---
query = "What are the latest advancements in LLM scaling laws?"
config = {"configurable": {"thread_id": str(uuid.uuid4())}}
result = app.invoke({"query": query, "messages": []}, config)

print("Final State:")
for key, value in result.items():
    if key == "messages":
        print(f"{key}: {[m.content for m in value]}")
    else:
        print(f"{key}: {value[:100]}..." if isinstance(value, str) else f"{key}: {value}")

## 4. LangSmith Tracing (Optional)

In [None]:
# --- 4.1 Enable LangSmith (set env vars first) ---
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-langsmith-key"

# Rerun with tracing
result_with_trace = app.invoke({"query": query, "messages": []}, config)
print("Check traces at https://smith.langchain.com")

## 5. Visualize Graph

In [None]:
from IPython.display import Image
Image(app.get_graph().draw_png())