# Embedded Percolate Demo (Simplified)

This notebook demonstrates using Percolate in embedded mode with DuckDB for storage and vector search capabilities.

> **NOTE:** This notebook uses the latest batched embedding utilities for improved efficiency.

In [ ]:
# Embedded Percolate Demo (Simplified)

This notebook demonstrates using Percolate in embedded mode with DuckDB for storage and vector search capabilities.

> **NOTE:** This notebook uses the latest batched embedding utilities for improved efficiency. 
> 
> **IMPORTANT:** For the embedding and semantic search sections to work, you need to have an OPENAI_API_KEY environment variable set with a valid API key.

In [None]:
# Initialize service with a model
agent_repo = DuckDBService(Agent)

# Register the entity
agent_repo.register()

In [None]:
# Create agents with deterministic IDs
agents = [
    Agent(
        id=make_uuid("SearchAgent"),
        name="SearchAgent",
        category="Research",
        description="Agent that performs web searches and summarizes results",
        spec={"capabilities": ["search", "summarize"]},
        functions={"search_web": "Searches the web"}
    ),
    Agent(
        id=make_uuid("WriterAgent"),
        name="WriterAgent",
        category="Content",
        description="Agent that writes content based on prompts",
        spec={"capabilities": ["write"]},
        functions={"write": "Writes content"}
    ),
    Agent(
        id=make_uuid("DataAnalysisAgent"),
        name="DataAnalysisAgent",
        category="Analytics",
        description="Agent that analyzes data and generates insights",
        spec={"capabilities": ["analyze", "visualize"]},
        functions={"analyze_dataset": "Performs statistical analysis on data"}
    )
]

# Insert agents
agent_repo.update_records(agents)

In [None]:
# Test upsert by updating an existing agent
updated_agent = Agent(
    id=make_uuid("WriterAgent"),
    name="WriterAgent",
    category="Content Creation",
    description="Agent that writes and edits content based on prompts",
    spec={"capabilities": ["write", "edit"]},
    functions={"write": "Writes better content"}
)

agent_repo.update_records(updated_agent)

In [None]:
# Check the results
agent_repo.execute("SELECT name, category, description FROM p8.\"Agent\" ORDER BY name")

## Adding Embeddings for Semantic Search

Next, we'll create embeddings for our agents to enable semantic search. We use the new batch embedding functionality for better efficiency.

## Using Batch Embedding for Improved Efficiency

The sections below demonstrate how to use the more efficient batch embedding implementation that:

1. Processes multiple embeddings at once in a single API call
2. Reuses the same efficient update mechanism as regular records
3. Handles errors gracefully

This implementation is significantly more efficient and cost-effective than processing embeddings one at a time.

# Prepare records for embedding
records = []
for agent in agent_repo.select():
    records.append({
        "id": agent["id"],
        "description": agent["description"]
    })

# Now use the built-in batch embedding functionality
try:
    embedding_result = agent_repo.add_embeddings(
        records=records, 
        embedding_field="description", 
        batch_size=10  # Process in batches for efficiency
    )
    
    print(f"Embedding success: {embedding_result['success']}")
    print(f"Embeddings added: {embedding_result['embeddings_added']}")
    if embedding_result.get('errors'):
        print(f"Errors: {embedding_result['errors']}")
except Exception as e:
    print(f"Error during embedding: {str(e)}")
    print("Note: This may fail if you don't have an OPENAI_API_KEY environment variable set.")

In [None]:
# Semantic search for agents related to "data analysis"
search_results = agent_repo.semantic_search("data analysis and visualization", limit=3)

print(f"Found {len(search_results)} results for 'data analysis and visualization'")
for i, result in enumerate(search_results):
    similarity = result.get('similarity', 0)
    print(f"{i+1}. {result['name']} - Similarity: {similarity:.4f}")
    print(f"   Description: {result['description']}")
    print()

# Semantic search for agents related to "data analysis"
try:
    search_results = agent_repo.semantic_search("data analysis and visualization", limit=3)
    
    print(f"Found {len(search_results)} results for 'data analysis and visualization'")
    for i, result in enumerate(search_results):
        similarity = result.get('similarity', 0)
        print(f"{i+1}. {result['name']} - Similarity: {similarity:.4f}")
        print(f"   Description: {result['description']}")
        print()
except Exception as e:
    print(f"Semantic search error: {str(e)}")
    print("Note: This may fail if embedding generation failed in the previous step.")

In [None]:
# Drop the entity (optional)
# result = agent_repo.drop_entity()
# print(f"Dropped entity: {result['success']}")
# if not result['success'] and result.get('errors'):
#     print(f"Errors: {result['errors']}")