### 03.03. Setting up Indexes

In [5]:
#Install prerequisite packages
!pip3 install python-dotenv==1.0.0

!pip3 install llama-index==0.10.59
!pip3 install llama-index-llms-openai==0.1.27
!pip3 install llama-index-embeddings-openai==0.1.11
!pip3 install llama-index-llms-azure-openai==0.1.10
!pip3 install llama-index-embeddings-azure-openai==0.1.11



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --

In [6]:
#Setup Azure Open AI connection
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

from llama_index.core import Settings
import os
import nest_asyncio

nest_asyncio.apply()

#API info. Replace with your own keys and end points
api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
llm_deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_NAME")
embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
api_version = "2024-05-01-preview"

#Setup the LLM
Settings.llm=AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name=llm_deployment,
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

#Setup the embedding model RAG
Settings.embed_model= AzureOpenAIEmbedding(
    deployment_name=embedding_deployment, 
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)


### A word on endpoint and deployment setup
The course from whence this derives omits any information on setup of endpoints and deployments. This must be done w/in Azure OpenAI services. Below is a summary of instructions on how to provision your endpoints to setup the script.

- Pre-requisites
	•	A Microsoft Azure account
	•	Approval for Azure OpenAI Service access (apply [here](https://aka.ms/oai/access) if needed)

#### Step 1: Create an Azure OpenAI Resource
	1.	Go to the Azure Portal: https://portal.azure.com
	2.	Click “Create a resource”
	3.	Search for “Azure OpenAI” and select it
	4.	Click “Create”
	5.	Fill in the form:
		•	Subscription: your billing subscription
		•	Resource group: create or select one
		•	Region: pick a region that supports OpenAI (e.g., East US, South Central US)
		•	Name: e.g., my-openai-resource
	6.	Click “Review + Create”, then “Create”

#### Step 2: Deploy a Model (LLM and Embedding)

- A. Deploy an LLM (e.g., GPT-35-Turbo)
	1.	Go to your Azure OpenAI resource
	2.	In the sidebar, click “Deployments”
	3.	Click “Create”
	4.	Fill in:
		•	Model: gpt-35-turbo
		•	Version: choose latest
		•	Deployment name: e.g., agentai-gpt35
	5.	Click “Create”

- B. Deploy an Embedding Model (e.g., text-embedding-ada-002)
	1.	Repeat steps above for a second deployment
	2.	Choose:
		•	Model: text-embedding-ada-002
		•	Deployment name: e.g., agentai-embedding

#### Step 3: Retrieve Keys and Endpoint
	1.	In your Azure OpenAI resource, go to “Keys and Endpoint”
	2.	Copy:
		•	Key 1
		•	Endpoint URL

#### Step 4: Store in Your Environment

In your shell profile (~/.zshrc or .env for project-based work):
```bash
export AZURE_OPENAI_API_KEY="your-api-key-here"
export AZURE_OPENAI_ENDPOINT="https://your-resource-name.openai.azure.com/"
export AZURE_OPENAI_LLM_DEPLOYMENT_NAME="agentai-gpt35"
export AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME="agentai-embedding"
```
Then reload:
```bash
source ~/.zshrc  # or use `dotenv` loader if in a .env
```

Now, a validation script to verify if the end points are correct.

In [7]:
from llama_index.llms.azure_openai import AzureOpenAI as LlamaAzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
import traceback

# Test the LLM and embedding deployments - This will verify if your deployments are working correctly
try:
    print(f"\nTesting LLM Deployment: {llm_deployment}")
    llm = LlamaAzureOpenAI(
        deployment_name=llm_deployment,
        api_key=api_key,
        azure_endpoint=azure_endpoint,
        api_version=api_version,
    )
    result = llm.complete("Say the word 'yabadabado!'.")
    print("LLM response:", result.text.strip())
except Exception:
    print("LLM deployment failed.")
    traceback.print_exc()

try:
    print(f"\nTesting Embedding Deployment: {embedding_deployment}")
    embed_model = AzureOpenAIEmbedding(
        deployment_name=embedding_deployment,
        api_key=api_key,
        azure_endpoint=azure_endpoint,
        api_version=api_version,
    )
    emb = embed_model.get_text_embedding("Test embedding content always")
    print("Embedding vector created. Length:", len(emb))
except Exception:
    print("Embedding deployment failed.")
    traceback.print_exc()


Testing LLM Deployment: agentai-gpt35
LLM response: Yabadabado!

Testing Embedding Deployment: agentai-embedding
Embedding vector created. Length: 1536


In [8]:
#Create indexes for vector search
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import  VectorStoreIndex

splitter=SentenceSplitter(chunk_size=1024)

#-------------------------------------------------------------------
#Setup Aeroflow document index
#-------------------------------------------------------------------
aeroflow_documents=SimpleDirectoryReader(
    input_files=["AeroFlow_Specification_Document.pdf"])\
            .load_data()

#Read documents into nodes
aeroflow_nodes=splitter.get_nodes_from_documents(aeroflow_documents)
#Create a vector Store
aeroflow_index=VectorStoreIndex(aeroflow_nodes)
#Create a query engine
aeroflow_query_engine = aeroflow_index.as_query_engine()

#-------------------------------------------------------------------
#Setup EchoSprint document index
#-------------------------------------------------------------------
ecosprint_documents=SimpleDirectoryReader(
    input_files=["EcoSprint_Specification_Document.pdf"])\
            .load_data()
#Read documents into nodes
ecosprint_nodes=splitter.get_nodes_from_documents(ecosprint_documents)
#Create a vector Store
ecosprint_index=VectorStoreIndex(ecosprint_nodes)
#Create a query engine
ecosprint_query_engine = ecosprint_index.as_query_engine()


### 03.04. Setup the Agentic Router

**NOTE**: The description provided when instantiating the QueryEngineTool is the primary resourced leveraged by the router during the tool selection step. Erroneous or incomplete descriptions are likely to reduce robustness of tool selection.

In [9]:
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

#Create a query engine Tool for Aeroflow questions
aeroflow_tool = QueryEngineTool.from_defaults(
    query_engine=aeroflow_query_engine,
    name="Aeroflow specifications",
    description=(
        "Contains information about Aeroflow : Design, features, technology, maintenance, warranty"
    ),
)

#Create a query engine Tool for EcoSprint questions
ecosprint_tool = QueryEngineTool.from_defaults(
    query_engine=ecosprint_query_engine,
    name="EcoSprint specifications",
    description=(
        "Contains information about EcoSprint : Design, features, technology, maintenance, warranty"
    ),
)

# Create an erroneous query engine tool with mislabeled content
fake_pudding_tool = QueryEngineTool.from_defaults(
    query_engine=ecosprint_query_engine,
    name="Pudding specifications",
    description=(
        "Contains information about pudding : mmm, delicious, sweet, chocolate, vanilla, strawberry"
    ),
)

#Create a Router Agent. Provide the Tools to the Agent

router_agent=RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        aeroflow_tool,
        ecosprint_tool,
        fake_pudding_tool,
    ],
    verbose=True
)

### 03.05. Route with Agentic AI

In [10]:
#Ask a question about AeroFlow
response = router_agent.query("What colors are available for AeroFlow?")
print("\nResponse: ",str(response))

[1;3;38;5;200mSelecting query engine 0: AeroFlow is a product or technology related to design, features, and technology, which may include information about color options..
[0m
Response:  Coastal Blue, Sunset Orange, and Pearl White.


In [11]:
response = router_agent.query("What colors are available for EcoSprint?")
print("\nResponse: ",str(response))

[1;3;38;5;200mSelecting query engine 1: Contains information about EcoSprint which includes details about design, features, and technology, which may mention the available colors.
[0m
Response:  Midnight Black, Ocean Blue, and Pearl White.


In [12]:
response = router_agent.query("What best color for strawberry pudding?")
print("\nResponse: ",str(response))
# Maximum LoLz!

[1;3;38;5;200mSelecting query engine 2: Choice 3 contains information about pudding, including strawberry flavor..
[0m
Response:  Ocean Blue


In [13]:
# provide a question that requires engaging multiple tools for an answer
question = "Which of the EcoSprint and AeroFlow are likeliest to have minimal maintenance concerns? How do the warranties compare?"
response = router_agent.query(question)
print("\nResponse: ",str(response))
# Response indicates failure in logic or resource parsing, as the AeroFlow warranty is info is provided

[1;3;38;5;200mSelecting query engine 1: Contains information about EcoSprint which is likeliest to have minimal maintenance concerns based on the provided information. The warranty information for EcoSprint can be compared with Aeroflow to determine how they differ..
[0m
Response:  The EcoSprint is likely to have minimal maintenance concerns with recommended checks every 10,000 miles and easy scheduling of service appointments through the mobile app. It comes with a 5-year/60,000-mile warranty for the vehicle and an 8-year/100,000-mile warranty for the battery.


## **TESTING BELOW**

In [14]:
import re
from typing import List, Optional

def chunk_by_sentences(text: str, max_chars: int = 1000, overlap_sentences: int = 1) -> List[str]:
    """
    Split text into chunks by sentences with optional overlap.
    
    Args:
        text: Input text to chunk
        max_chars: Maximum characters per chunk
        overlap_sentences: Number of sentences to overlap between chunks
    
    Returns:
        List of text chunks
    """
    if not text or not text.strip():
        return []
    
    # Split by sentence endings, keeping the punctuation
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    
    # Remove empty sentences
    sentences = [s.strip() for s in sentences if s.strip()]
    
    if not sentences:
        return []
    
    chunks = []
    current_chunk = ""
    sentence_buffer = []  # For overlap management
    
    i = 0
    while i < len(sentences):
        sentence = sentences[i]
        
        # Check if adding this sentence would exceed max_chars
        potential_chunk = current_chunk + (" " if current_chunk else "") + sentence
        
        if len(potential_chunk) <= max_chars:
            # Add sentence to current chunk
            current_chunk = potential_chunk
            sentence_buffer.append(sentence)
            i += 1
        else:
            # Current chunk is full, save it and start new one
            if current_chunk:
                chunks.append(current_chunk.strip())
                
                # Start new chunk with overlap sentences
                overlap_start = max(0, len(sentence_buffer) - overlap_sentences)
                overlap_text = " ".join(sentence_buffer[overlap_start:])
                
                current_chunk = overlap_text
                sentence_buffer = sentence_buffer[overlap_start:]
            else:
                # Single sentence is too long, add it anyway
                chunks.append(sentence)
                sentence_buffer = []
                i += 1
    
    # Add final chunk if it has content
    if current_chunk and current_chunk.strip():
        chunks.append(current_chunk.strip())
    
    return chunks


def advanced_chunk_by_sentences(text: str, max_chars: int = 1000, 
                               overlap_sentences: int = 1,
                               min_chunk_chars: int = 100) -> List[str]:
    """
    Advanced chunking with minimum chunk size enforcement.
    
    Args:
        text: Input text to chunk
        max_chars: Maximum characters per chunk
        overlap_sentences: Number of sentences to overlap between chunks
        min_chunk_chars: Minimum characters per chunk (merge small chunks)
    
    Returns:
        List of text chunks
    """
    initial_chunks = chunk_by_sentences(text, max_chars, overlap_sentences)
    
    # Merge chunks that are too small
    merged_chunks = []
    i = 0
    
    while i < len(initial_chunks):
        current_chunk = initial_chunks[i]
        
        # If chunk is too small and there's a next chunk, try to merge
        while (len(current_chunk) < min_chunk_chars and 
               i + 1 < len(initial_chunks) and 
               len(current_chunk + " " + initial_chunks[i + 1]) <= max_chars):
            i += 1
            current_chunk += " " + initial_chunks[i]
        
        merged_chunks.append(current_chunk)
        i += 1
    
    return merged_chunks


# Test Suite
def test_chunking_functions():
    """Comprehensive test suite for chunking functions."""
    
    print("=== CHUNKING FUNCTION TESTS ===\n")
    
    # Test 1: Basic functionality
    print("Test 1: Basic Sentence Chunking")
    sample_text = """
    This is the first sentence. This is the second sentence with more content to make it longer. 
    This is the third sentence. This is the fourth sentence that contains even more information 
    to test the chunking mechanism. This is the fifth sentence. This is the sixth sentence.
    """
    
    chunks = chunk_by_sentences(sample_text, max_chars=100, overlap_sentences=1)
    print(f"Original text length: {len(sample_text)} characters")
    print(f"Number of chunks: {len(chunks)}")
    
    for i, chunk in enumerate(chunks):
        print(f"Chunk {i+1} ({len(chunk)} chars): {chunk[:80]}...")
    print()
    
    # Test 2: Edge cases
    print("Test 2: Edge Cases")
    
    # Empty text
    empty_chunks = chunk_by_sentences("", max_chars=100)
    print(f"Empty text chunks: {len(empty_chunks)}")
    
    # Single sentence
    single_chunks = chunk_by_sentences("This is a single sentence.", max_chars=100)
    print(f"Single sentence chunks: {len(single_chunks)}")
    print(f"Single sentence content: {single_chunks[0] if single_chunks else 'None'}")
    
    # Very long single sentence
    long_sentence = "This is an extremely long sentence that exceeds the maximum character limit by quite a lot and should be handled gracefully by the chunking function."
    long_chunks = chunk_by_sentences(long_sentence, max_chars=50)
    print(f"Long sentence chunks: {len(long_chunks)}")
    print(f"Long sentence chunk: {long_chunks[0][:50]}..." if long_chunks else "None")
    print()
    
    # Test 3: Overlap validation
    print("Test 3: Overlap Validation")
    overlap_text = "First sentence here. Second sentence content. Third sentence data. Fourth sentence information. Fifth sentence details."
    
    no_overlap = chunk_by_sentences(overlap_text, max_chars=60, overlap_sentences=0)
    with_overlap = chunk_by_sentences(overlap_text, max_chars=60, overlap_sentences=1)
    
    print("No overlap:")
    for i, chunk in enumerate(no_overlap):
        print(f"  Chunk {i+1}: {chunk}")
    
    print("\nWith 1 sentence overlap:")
    for i, chunk in enumerate(with_overlap):
        print(f"  Chunk {i+1}: {chunk}")
    print()
    
    # Test 4: Advanced chunking with minimum size
    print("Test 4: Advanced Chunking (Min Size Enforcement)")
    
    varied_text = """
    Short. This is a medium length sentence with some content. 
    Another short one. This is another medium sentence.
    Brief. This is the final longer sentence with more comprehensive information.
    """
    
    basic_chunks = chunk_by_sentences(varied_text, max_chars=80)
    advanced_chunks = advanced_chunk_by_sentences(varied_text, max_chars=80, min_chunk_chars=30)
    
    print("Basic chunking:")
    for i, chunk in enumerate(basic_chunks):
        print(f"  Chunk {i+1} ({len(chunk)} chars): {chunk}")
    
    print("\nAdvanced chunking (min 30 chars):")
    for i, chunk in enumerate(advanced_chunks):
        print(f"  Chunk {i+1} ({len(chunk)} chars): {chunk}")
    print()
    
    # Test 5: Real-world example with technical content
    print("Test 5: Technical Document Example")
    
    technical_text = """
    Machine learning models require careful preprocessing of input data. 
    The text-embedding-ada-002 model produces fixed-length embeddings of 1536 dimensions. 
    Chunking strategies are essential for handling long documents effectively. 
    Retrieval-augmented generation systems benefit from optimal chunk sizes. 
    Vector databases store these embeddings for similarity search operations.
    Semantic search relies on cosine similarity calculations between vectors.
    """
    
    tech_chunks = advanced_chunk_by_sentences(technical_text, max_chars=120, 
                                            overlap_sentences=1, min_chunk_chars=50)
    
    print(f"Technical text chunks ({len(tech_chunks)} total):")
    for i, chunk in enumerate(tech_chunks):
        print(f"  Chunk {i+1} ({len(chunk)} chars): {chunk}")
    
    return chunks, tech_chunks


def validate_chunks_for_embedding(chunks: List[str], max_embedding_tokens: int = 8192) -> dict:
    """
    Validate chunks are suitable for embedding models.
    
    Args:
        chunks: List of text chunks
        max_embedding_tokens: Maximum tokens for embedding model (ada-002 = 8192)
    
    Returns:
        Dictionary with validation results
    """
    results = {
        'total_chunks': len(chunks),
        'avg_length': sum(len(chunk) for chunk in chunks) / len(chunks) if chunks else 0,
        'max_length': max(len(chunk) for chunk in chunks) if chunks else 0,
        'min_length': min(len(chunk) for chunk in chunks) if chunks else 0,
        'chunks_over_limit': [],
        'empty_chunks': []
    }
    
    # Rough token estimation (1 token ≈ 4 characters for English)
    char_to_token_ratio = 4
    max_chars = max_embedding_tokens * char_to_token_ratio
    
    for i, chunk in enumerate(chunks):
        if len(chunk) > max_chars:
            results['chunks_over_limit'].append((i, len(chunk)))
        if not chunk.strip():
            results['empty_chunks'].append(i)
    
    return results


In [15]:
chunks, tech_chunks = test_chunking_functions()

print("\n=== EMBEDDING VALIDATION ===")
validation = validate_chunks_for_embedding(tech_chunks)

print(f"Total chunks: {validation['total_chunks']}")
print(f"Average length: {validation['avg_length']:.1f} characters")
print(f"Length range: {validation['min_length']} - {validation['max_length']} characters")
print(f"Chunks over embedding limit: {len(validation['chunks_over_limit'])}")
print(f"Empty chunks: {len(validation['empty_chunks'])}")

if validation['chunks_over_limit']:
    print("WARNING: Some chunks may be too long for embedding models!")
else:
    print("✓ All chunks are within embedding model limits")

=== CHUNKING FUNCTION TESTS ===

Test 1: Basic Sentence Chunking


KeyboardInterrupt: 