# Notebook 2: Simple RAG

In this notebook you will explore how to preform simple RAG (or nieve RAG) using semantic search and a chat model.

## Learning Objectives
- Learn how to perform a semantic search using Azure AI Search
- Explore using hybrid search using Azure AI Search
- Learn how to combine search results to perform RAG with an LLM

### Install Required Packages

In [None]:
%pip install -U agent-framework --pre -q
%pip install -openai -q
%pip install python-dotenv -q
%pip install azure-search-documents -q
%pip install azure-identity -q

### Setup the Module Imports

In [None]:
import json
import os

from agent_framework.azure import AzureOpenAIChatClient
from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from dotenv import load_dotenv
from openai import AzureOpenAI

### Get the needed environment variables

In [None]:
load_dotenv(override=True)

azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_key = os.getenv("AZURE_OPENAI_API_KEY")
api_version = os.getenv("AZURE_OPENAI_API_VERSION")
embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
search_endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
search_key = os.getenv("AZURE_SEARCH_API_KEY")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

Define a method to get the embeddings for the queries we are going to be asking

In [None]:
EMBEDDING_DIMENSIONS = 1536 #3072 for 3-large

def get_embeddings(text: str) -> list[float]:
    if not text or text.strip() == "":
        # Return zero vector for empty text
        return [0.0] * EMBEDDING_DIMENSIONS
    
    # Truncate text if too long (max ~8000 tokens for ada-002)
    max_chars = 30000  # Approximate character limit
    if len(text) > max_chars:
        text = text[:max_chars]
    
    client = AzureOpenAI(
        azure_endpoint=azure_endpoint,
        api_key=api_key,
        api_version=api_version
    )

    try:
        response = client.embeddings.create(
            input=text,
            model=embedding_deployment
        )
        return response.data[0].embedding
    except Exception as e:
        print(f"Error generating embedding: {e}")
        # Return zero vector on error
        return [0.0] * EMBEDDING_DIMENSIONS

Define a method for performing the searches

In [None]:
search_client = SearchClient(
    endpoint=search_endpoint,
    index_name=index_name,
    credential=AzureKeyCredential(search_key),
)

def do_search(query: str, use_hybrid: bool = True, top_k: int = 3) -> str:

    print("=" * 80)
    print(f"Query: {query}")
    print(f"Hybrid search: {use_hybrid}")
    print("-" * 80)

    q_vector = get_embeddings(query)

    vq = VectorizedQuery(
        vector=q_vector,
        fields="BodyEmbeddings,AnswerEmbeddings",
    )

    if use_hybrid:
        results = search_client.search(
            search_text=query, 
            vector_queries=[vq],
            top=top_k,
        )
    else:
        results = search_client.search(
            search_text=None,
            vector_queries=[vq],
            top=top_k,
        )

    # Format results for LLM consumption
    formatted_results = []
    
    for i, doc in enumerate(results):
        score = doc.get("@search.score", None)
        
        # Print to console (original behavior)
        # print(f"[{i}] id={doc['Id']}  score={score:.4f}" if score is not None else f"[{i}] id={doc['Id']}")
        # print(doc["Subject"])
        # print(doc["Body"])
        # print(doc["Answer"])
        print("-" * 40)
        
        # Build JSON object for each result
        result_obj = {
            "document_number": i + 1,
            "id": doc.get("Id"),
            "body": doc.get("Body"),
            "answer": doc.get("Answer"),
            "type": doc.get("Type"),
            "department": doc.get("Queue"),
            "priority": doc.get("Priority"),
            "business_type": doc.get("Business_Type"),
            "search_score": score
        }
        
        formatted_results.append(result_obj)
    
    # Return as formatted JSON string for RAG
    rag_context = json.dumps(formatted_results, indent=2, ensure_ascii=False)

    #     # Build formatted string for each result
    #     result_str = f"Document {i+1}:\n{doc['Subject']}\n{doc['Body']}\nAnswer: {doc['Answer']}"
    #     formatted_results.append(result_str)
    
    # # Combine all results into a single string for RAG
    # rag_context = "\n\n".join(formatted_results)
    print("RAG Context:")
    print(rag_context)
    print("-" * 40)

    return rag_context

Define a prompts for system instructions and to use with the user query

In [None]:
SYSTEM_PROMPT = """
You are an AI assistant that helps users learn information from the IT support tickent knowledge base.
Answer the question using only the provided context.
Use bullets if the answer has multiple points.
If the answer is longer than 3 sentences, provide a summary.
Answer ONLY with the facts listed in the list of sources provided in the context with the user query. 
Cite your source when you answer the question with the format [source-id].
If the answer is not contained within the context, respond with "I don't know."
"""

RAG_PROMPT = """
User Question: {user_query}
Context:
{context}
"""

Queries to try:
- "What problems are there with Surface devices?",
- "What sort of AWS problems have been reported?",
- "Are there any issues logged for Dell XPS laptops?"
- "Do we have more issues with MacBook Air computers or Dell XPS laptops?",
- "What issues do we have with dell xps laptops?",
- "What issues are for Dell XPS laptops and the user tried Win + Ctrl + Shift + B?",
- "How many tickets were logged and Incidents for Human Resources and low priority?",
- "Which Dell XPS issue does not mention Windows?",
- "What department had consultants with Login Issues?"


In [None]:
# answer is Human Resources
#user_query = "What department had consultants with Login Issues?"

# answer is 3 - won't get this one right
user_query = "How many tickets were logged and Incidents for Human Resources and low priority?" 

# Hybrid on:
context = do_search(user_query, use_hybrid=True, top_k=3)

# Hybrid off:
#context = do_search(user_query, use_hybrid=False, top_k=3)

In [None]:
agent = AzureOpenAIChatClient(credential=DefaultAzureCredential()).create_agent(
    instructions=SYSTEM_PROMPT,
    name="rag-agent"
)

Now call the LLM with the original user question and the search results

In [None]:
rag_context = RAG_PROMPT.format(
    user_query=user_query,
    context=context)

result = await agent.run(rag_context)
print(result.text)

Play around with the code above and try changing the following:
- top_k value - default is 3, but try more and less to see if there is a difference
- Hybrid - see if a hybrid search makes any difference
- user_query - try some of the other questions listed (or you own)

For Example - this query won't get a correct answer:
'''
user_query = "How many tickets were logged and Incidents for Human Resources and low priority?" 
'''

See if you can figure out why or how to fix it!