# RAG with Azure AI search

In [None]:
# Import required libraries
import os
import json
from dotenv import load_dotenv

from langchain_openai import AzureOpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Configure environment variables
load_dotenv()

In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.models import Vector
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    SearchIndex,
    SemanticConfiguration,
    PrioritizedFields,
    SemanticField,
    SearchField,
    SemanticSettings,
    VectorSearch,
    VectorSearchAlgorithmConfiguration,
)

In [None]:
from openai import AzureOpenAI
# Configure environment variables
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

OPENAI_GPT35_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT35_DEPLOYMENT_NAME")
OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT4_DEPLOYMENT_NAME")
OPENAI_GPT4V_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT4V_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_DALLE_DEPLOYMENT_NAME = os.getenv("OPENAI_DALLE_DEPLOYMENT_NAME")

OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Configure OpenAI API
client = AzureOpenAI(
  azure_endpoint = OPENAI_DEPLOYMENT_ENDPOINT, 
  api_key=OPENAI_API_KEY,  
  api_version="2023-05-15"
)
credential = AzureKeyCredential(key)

In [None]:
# Read your data, generate OpenAI embeddings and export to a format to insert your Azure Cognitive Search index:
embeddingmodel = AzureOpenAIEmbeddings(
    deployment=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
    chunk_size = 1)

In [None]:
# Generate Document Embeddings using OpenAI Ada Model
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def generate_embeddings(text):
    embeddings = embeddingmodel.embed_query(text)
    return embeddings

## Perform a vector similarity search

In [None]:
# Pure Vector Search
query = "semantic kernel?"

search_client = SearchClient(service_endpoint, index_name="sk-cogsrch-vector-index-2", credential=credential)

results = search_client.search(
    search_text=None,
    vector=generate_embeddings(query),
    top_k=3,
    vector_fields="contentVector",
    select=["title", "content"],
)

for result in results:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}")

In [None]:
query = "semantic kernel planner and kernel"

search_client = SearchClient(
    service_endpoint, index_name="sk-cogsrch-vector-index-2", credential=credential)

results = search_client.search(
    search_text=None,
    vector=generate_embeddings(query), top_k=3,
    vector_fields="contentVector",
    select=["title", "content"],
)

for result in results:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}")

In [None]:
# Pure Vector Search multi-lingual
query = "Planificador semántico del kernel y kernel"

search_client = SearchClient(
    service_endpoint, index_name="sk-cogsrch-vector-index-2", credential=credential)

results = search_client.search(
    search_text=None,
    vector=generate_embeddings(query), top_k=3,
    vector_fields="contentVector",
    select=["title", "content"],
)

for result in results:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}")

## Perform Cross-Field Vector Search with a filter

In [None]:
# Pure Vector Search with Filter
query = "programming languages supported by semantic kernel"

search_client = SearchClient(service_endpoint, index_name="sk-cogsrch-vector-index-2", credential=credential)

results = search_client.search(
    search_text=None,
    vector=generate_embeddings(query), top_k=3,
    vector_fields="titleVector, contentVector",
    filter="title eq 'Semantic Kernel'",
    select=["title", "content"] #searching on two fields title and content
)

for result in results:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}")

## Perform an Hybrid Search

In [None]:
# Hybrid Search
query = "semantic kernel planner and kernel"

search_client = SearchClient(service_endpoint, index_name="sk-cogsrch-vector-index-2", credential=credential)


results = search_client.search(
    search_text=query,
    vector=generate_embeddings(query), top_k=3,
    vector_fields="contentVector",
    filter="title eq 'Semantic Kernel'",
    select=["title", "content",],
    top=3
)

print(type(results))

for result in results:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}\n")

## Perform a Semantic Hybrid Search

In [None]:
# Semantic Hybrid Search
query = "semantic kernel planner and kernel"

search_client = SearchClient(service_endpoint, index_name="sk-cogsrch-vector-index-2", credential=credential)

results = search_client.search(
    search_text=query,
    vector=generate_embeddings(query), top_k=3,
    vector_fields="contentVector",
    select=["title", "content"],
    query_type="semantic", query_language="en-us", semantic_configuration_name='sk-semantic-config', query_caption="extractive", query_answer="extractive",
    top=3
)

semantic_answers = results.get_answers()
for answer in semantic_answers:
    if answer.highlights:
        print(f"Semantic Answer: {answer.highlights}")
    else:
        print(f"Semantic Answer: {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")

for result in results:
    print(f"Title: {result['title']}")
    print(f"Content: {result['content']}")

    captions = result["@search.captions"]
    if captions:
        caption = captions[0]
        if caption.highlights:
            print(f"Caption: {caption.highlights}\n")
        else:
            print(f"Caption: {caption.text}\n")