# Exploration Notebook

This notebook is for exploring and experimenting with the document reasoning agent.

## Features:
- Document loading and chunking
- Embedding visualization
- Retrieval testing
- Agent interaction


In [None]:
# Import necessary modules
import sys
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path().absolute().parent))

from src.retrieval.chunker import chunk_text, semantic_chunk_text, chunk_pdf
from src.retrieval.retriever import Retriever
from src.retrieval.embedder import Embedder
from src.agent.agent import Agent
from src.agent.planner import plan
from src.agent.worker import Worker
from src.llm.local_model_client import LocalModelClient


## 1. Document Chunking Exploration


In [None]:
# Sample text for exploration
sample_text = """
Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience.
Deep learning uses neural networks with multiple layers to process complex patterns in data.
Natural language processing handles text data and enables machines to understand human language.
Supervised learning uses labeled data to train models that can make predictions.
Unsupervised learning finds patterns in data without labeled examples.
Reinforcement learning trains agents through rewards and penalties in an environment.
"""

# Fixed-size chunking
fixed_chunks = chunk_text(sample_text, size=100)
print(f"Fixed-size chunks: {len(fixed_chunks)}")
for i, chunk in enumerate(fixed_chunks, 1):
    print(f"\nChunk {i} ({len(chunk)} chars): {chunk[:80]}...")


In [None]:
# Semantic chunking
semantic_chunks = semantic_chunk_text(sample_text, max_chunk_size=200)
print(f"Semantic chunks: {len(semantic_chunks)}")
for i, chunk in enumerate(semantic_chunks, 1):
    print(f"\nChunk {i} ({len(chunk)} chars): {chunk[:80]}...")


## 2. Retrieval System Exploration


In [None]:
# Setup retriever
retriever = Retriever()
retriever.index_chunks(fixed_chunks)

# Test retrieval
query = "What is machine learning?"
results = retriever.retrieve(query, k=3)
print(f"Query: '{query}'")
print(f"\nRetrieved {len(results)} chunks:")
for i, chunk in enumerate(results, 1):
    print(f"\n{i}. {chunk}")


In [None]:
# Retrieve with scores
scored_results = retriever.retrieve_with_scores(query, k=3)
print(f"Query: '{query}'")
print(f"\nRetrieved chunks with similarity scores:")
for i, (chunk, score) in enumerate(scored_results, 1):
    print(f"\n{i}. Score: {score:.3f}")
    print(f"   {chunk[:100]}...")


## 3. Agent Interaction


In [None]:
# Setup agent
model = LocalModelClient()
worker = Worker(retriever, model)
agent = Agent(plan, worker, model)

# Test query
query = "What is deep learning?"
print(f"Query: '{query}'")
try:
    answer = agent.run(query)
    print(f"\nAnswer: {answer}")
except NotImplementedError:
    print("\n(Note: Local model not loaded - this is expected without model_name)")
    print("The agent structure is working correctly!")
