# Notebook 2: Simple RAG

In this notebook you will explore how to preform simple RAG (or nieve RAG) using semantic search and a chat model.

## Learning Objectives
- Learn how to perform a semantic search using Azure AI Search
- Explore using hybrid search using Azure AI Search
- Learn how to combine search results to perform RAG with an LLM

In [None]:
#r "nuget:Microsoft.Agents.AI.OpenAI, *-*"
#r "nuget:Microsoft.Agents.AI.AzureAI, 1.0.0-preview.260108.1"
#r "nuget:Azure.AI.OpenAI, *-*"
#r "nuget:Azure.Core, *-*"
#r "nuget:Azure.Identity, *-*"
#r "nuget:Azure.Search.Documents, *-*"
#r "nuget:System.Linq.AsyncEnumerable, *-*"
#r "nuget:Microsoft.Extensions.Configuration, 10.0.1"
#r "nuget:Microsoft.Extensions.Configuration.Json, 10.0.1"
#r "nuget:Microsoft.Extensions.configuration.Binder, 10.0.1"
#r "nuget:Microsoft.Extensions.Configuration.EnvironmentVariables, 10.0.1"

### Setup the Imports

In [None]:
using Microsoft.Extensions.Configuration;
using System.IO;
using Azure;
using Azure.AI.OpenAI;
using Azure.Identity;
using OpenAI.Embeddings;
using System.Text.Json;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Azure;
using Azure.AI.OpenAI;
using Azure.Search.Documents;
using Azure.Search.Documents.Models;
using Microsoft.Agents.AI;

### Get the needed environment variables

In [None]:
public const string DefaultConfigFileName = "appsettings.Local.json";

public static string? FindConfigDirectory(string fileName)
{
    var directory = new DirectoryInfo(Directory.GetCurrentDirectory());

    while (directory is not null)
    {
        if (File.Exists(Path.Combine(directory.FullName, fileName)))
        {
            return directory.FullName;
        }
        directory = directory.Parent;
    }

    return null;
}

var basePath = FindConfigDirectory(DefaultConfigFileName)
            ?? throw new InvalidOperationException(
                $"Could not find {DefaultConfigFileName} in current directory or any parent directory.");

// Load configuration from appsettings.json
var configuration = new ConfigurationBuilder()
    .SetBasePath(basePath)
    .AddJsonFile("appsettings.Local.json", optional: true, reloadOnChange: true) // Optional environment-specific settings
    .AddEnvironmentVariables()
    .Build();


foreach (var kvp in configuration.AsEnumerable())
{
    if (!string.IsNullOrEmpty(kvp.Value))
    {
        Environment.SetEnvironmentVariable(kvp.Key, kvp.Value);
    }
}

Define a method to get the embeddings for the queries we are going to be asking

In [None]:

private const int EmbeddingDimensions = 1536; // 3072 for 3-large

private async Task<ReadOnlyMemory<float>> GetEmbeddingsAsync(string text)
{
    if (string.IsNullOrWhiteSpace(text))
    {
        // Return zero vector for empty text
        return new float[EmbeddingDimensions];
    }

    // Truncate text if too long (max ~8000 tokens for ada-002)
    const int maxChars = 30000; // Approximate character limit
    if (text.Length > maxChars)
    {
        text = text[..maxChars];
    }

    var client = new AzureOpenAIClient(
        new Uri(Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT")),
        new DefaultAzureCredential()
    );

    try
    {
        var embeddingClient = client.GetEmbeddingClient(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"));
        
        // Create embeddings for all chunks
        var response = await embeddingClient.GenerateEmbeddingAsync(text);
        
        // Extract embeddings from response
        return response.Value.ToFloats();
    }
    catch (Exception ex)
    {
        Console.WriteLine($"Error generating embedding: {ex.Message}");
        // Return zero vector on error
        return new float[EmbeddingDimensions];
    }
}

Define a method for performing the searches

In [None]:

private readonly SearchClient _searchClient = new(
    new Uri(Environment.GetEnvironmentVariable("AZURE_SEARCH_ENDPOINT")),
    Environment.GetEnvironmentVariable("AZURE_SEARCH_INDEX_NAME"),
    new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_SEARCH_API_KEY"))
);

public async Task<string> DoSearchAsync(string query, bool useHybrid = true, int topK = 3)
{
    Console.WriteLine(new string('=', 80));
    Console.WriteLine($"Query: {query}");
    Console.WriteLine($"Hybrid search: {useHybrid}");
    Console.WriteLine(new string('-', 80));

    var qVector = await GetEmbeddingsAsync(query);

    var vq = new VectorizedQuery(qVector)
    {
        KNearestNeighborsCount = topK,
        Fields = { "BodyEmbeddings", "AnswerEmbeddings" }
    };

    var searchOptions = new SearchOptions
    {
        Size = topK,
        VectorSearch = new VectorSearchOptions
        {
            Queries = { vq }
        }
    };

    var searchText = useHybrid ? query : null;
    SearchResults<SearchDocument> results = await _searchClient.SearchAsync<SearchDocument>(searchText, searchOptions);

    // Format results for LLM consumption
    var formattedResults = new List<Dictionary<string, object?>>();
    
    int i = 0;
    await foreach (var result in results.GetResultsAsync())
    {
        var doc = result.Document;
        var score = result.Score;

        Console.WriteLine(new string('-', 40));

        // Build object for each result
        var resultObj = new Dictionary<string, object?>
        {
            ["document_number"] = i + 1,
            ["id"] = doc["Id"],
            ["body"] = doc["Body"],
            ["answer"] = doc["Answer"],
            ["type"] = doc["Type"],
            ["department"] = doc["Queue"],
            ["priority"] = doc["Priority"],
            ["business_type"] = doc["Business_Type"],
            ["search_score"] = score
        };

        formattedResults.Add(resultObj);
        i++;
    }

    // Return as formatted JSON string for RAG
    var jsonOptions = new JsonSerializerOptions 
    { 
        WriteIndented = true,
        Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping
    };
    var ragContext = JsonSerializer.Serialize(formattedResults, jsonOptions);

    Console.WriteLine("RAG Context:");
    Console.WriteLine(ragContext);
    Console.WriteLine(new string('-', 40));

    return ragContext;
}


Define a prompts for system instructions and to use with the user query

In [None]:
string SYSTEM_PROMPT = """
You are an AI assistant that helps users learn information from the IT support tickent knowledge base.
Answer the question using only the provided context.
Use bullets if the answer has multiple points.
If the answer is longer than 3 sentences, provide a summary.
Answer ONLY with the facts listed in the list of sources provided in the context with the user query. 
Cite your source when you answer the question with the format [source-id].
If the answer is not contained within the context, respond with "I don't know."
""";

string RAG_PROMPT = """
User Question: {user_query}
Context:
{context}
""";

Queries to try:
- "What problems are there with Surface devices?",
- "What sort of AWS problems have been reported?",
- "Are there any issues logged for Dell XPS laptops?"
- "Do we have more issues with MacBook Air computers or Dell XPS laptops?",
- "What issues do we have with dell xps laptops?",
- "What issues are for Dell XPS laptops and the user tried Win + Ctrl + Shift + B?",
- "How many tickets were logged and Incidents for Human Resources and low priority?",
- "Which Dell XPS issue does not mention Windows?",
- "What department had consultants with Login Issues?"

In [None]:
// answer is Human Resources
//var user_query = "What department had consultants with Login Issues?";

//answer is 3 - won't get this one right
var user_query = "How many tickets were logged and Incidents for Human Resources and low priority?";

// Hybrid on:
var context = await DoSearchAsync(user_query, true, 3);

// Hybrid off:
//var context = await DoSearchAsync(user_query, false, 3);

In [None]:
var client = new AzureOpenAIClient(
    new Uri(Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT")), 
    new DefaultAzureCredential());

var chatClient = client.GetChatClient(Environment.GetEnvironmentVariable("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"));
var agent = chatClient.CreateAIAgent(SYSTEM_PROMPT, "rag-agent");

Now call the LLM with the original user question and the search results

In [None]:
string message = RAG_PROMPT
    .Replace("{user_query}", user_query)
    .Replace("{context}", context);

var result = await agent.RunAsync(message);
Console.WriteLine(result.Text);

Play around with the code above and try changing the following:
- top_k value - default is 3, but try more and less to see if there is a difference
- Hybrid - see if a hybrid search makes any difference
- user_query - try some of the other questions listed (or you own)

For Example - this query won't get a correct answer:
'''
user_query = "How many tickets were logged and Incidents for Human Resources and low priority?" 
'''

See if you can figure out why or how to fix it!