In [1]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery, VectorizableTextQuery
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
import requests
import os
from openai import AzureOpenAI

In [2]:
load_dotenv()

True

In [3]:
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
key = os.getenv("AZURE_SEARCH_API_KEY")

In [4]:
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME")
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME")


## **REAL STATE**

In [6]:
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))

In [18]:
results = search_client.search(search_text="*", include_total_count=True, top=5, filter="baths eq 1", select="region, city, baths,description_es,price",order_by="price desc")

In [None]:
results.get_count()

1833

## **RAG TIME**

In [11]:
if AZURE_OPENAI_API_KEY:
    openai_client = AzureOpenAI(
        api_key=AZURE_OPENAI_API_KEY,
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_version="2023-05-15"
    )
else:
    pass

In [12]:
if key:
    search_client = SearchClient(
        endpoint=service_endpoint,
        index_name=index_name,
        credential=AzureKeyCredential(key)
    )
else:
    azure_credential = DefaultAzureCredential()
    search_client = SearchClient(
        endpoint=service_endpoint,
        index_name=index_name,
        credential=azure_credential
    )

In [13]:
def get_embedding(text):
    response = openai_client.embeddings.create(
        model= AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
        input=text
    )

    return response.data[0].embedding

## **Prepare a question**

In [26]:
user_question = "What is included in my Northwind Health Plus plan that is not standard?"
user_question_embedding = get_embedding(user_question)
print(user_question_embedding)

[0.022349359467625618, 0.007078568451106548, -0.029733039438724518, 0.01794051006436348, 0.0021777276415377855, -0.032067134976387024, -0.03453853353857994, -0.001158467261120677, 0.006025936454534531, 0.03429444506764412, 0.02553776651620865, 0.011311979964375496, 0.004897026810795069, -0.012860416434705257, 0.0064683472737669945, 0.02567506581544876, -0.05858125537633896, -0.010129675269126892, 0.04155607894062996, -0.004534707870334387, 0.027536241337656975, 0.06565982848405838, 4.639232065528631e-05, 0.008054922334849834, 0.014889402315020561, 0.00806255079805851, -0.01511060819029808, -0.0046720076352357864, -0.016155611723661423, -0.013104504905641079, 0.0039130449295043945, 0.0016380631132051349, 0.020381394773721695, 0.0646834746003151, -0.0003525459032971412, -0.029153328388929367, 0.031731512397527695, -0.030236471444368362, 0.05284517630934715, 0.0534248873591423, -0.01436308678239584, 0.01884058676660061, 0.021388260647654533, 0.03450801968574524, 0.06382916122674942, -0.01

In [None]:
for i in results:
    print(i)

{'chunk': "year deductible is the same for \n\nall members of the plan and is reset each year on the plan's renewal date.  \n\nFor In-Network Services: The calendar year deductible for in-network services is $1,500 for \n\nindividuals and $3,000 for families. This means that you must pay the full cost of all covered \n\nservices until you have paid a total of $1,500 for an individual and $3,000 for a family. Once \n\nthis amount is reached, the plan will begin to pay its share of the cost of eligible services.  \n\nFor Out-of-Network Services: The plan does not have a calendar year deductible for out-of-\n\nnetwork services. However, out-of-network services are subject to higher cost sharing than \n\nin-network services, so be sure to check with your provider to find out the cost sharing that \n\napplies.  \n\nExceptions: Certain services are exempt from the calendar year deductible. These services \n\ninclude preventive care services and emergency services.  \n\nTips:  \n\n• Be sure t

## **RETRIEVE MATCHING DOCUMENTS**

#### **VECTOR SEARCH**

In [34]:
search_results = search_client.search(
    None,
    top=3,
    vector_queries=[
        VectorizableTextQuery(
            text=user_question,
            k_nearest_neighbors=3,
            fields="text_vector"
        )   
    ]
)

In [35]:
for result in search_results:
    print("Chunk ID:", result['chunk_id'])
    print("Title:", result['title'])
    print("Text:", result['chunk'])

Chunk ID: 728aa36edda2_aHR0cHM6Ly9ic3RvcmFnZTAzMTEyMDI1LmJsb2IuY29yZS53aW5kb3dzLm5ldC9kYXRhL3BkZi9CZW5lZml0X09wdGlvbnMucGRm0_pages_1
Title: Benefit_Options.pdf
Text: a variety of in-network providers, including primary care 
physicians, specialists, hospitals, and pharmacies. This plan does not offer coverage for emergency 
services, mental health and substance abuse coverage, or out-of-network services.

Comparison of Plans 
Both plans offer coverage for routine physicals, well-child visits, immunizations, and other preventive 
care services. The plans also cover preventive care services such as mammograms, colonoscopies, and 
other cancer screenings. 

Northwind Health Plus offers more comprehensive coverage than Northwind Standard. This plan offers 
coverage for emergency services, both in-network and out-of-network, as well as mental health and 
substance abuse coverage. Northwind Standard does not offer coverage for emergency services, mental 
health and substance abuse coverage, 

In [41]:
search_results = search_client.search(
    None,
    top=1,
    vector_queries=[
        VectorizedQuery(
            vector=user_question_embedding,
            k_nearest_neighbors=1,
            fields="text_vector"
        )   
    ]
)

In [42]:
for result in search_results:
    print("Chunk ID:", result['chunk_id'])
    print("Title:", result['title'])

Chunk ID: 728aa36edda2_aHR0cHM6Ly9ic3RvcmFnZTAzMTEyMDI1LmJsb2IuY29yZS53aW5kb3dzLm5ldC9kYXRhL3BkZi9CZW5lZml0X09wdGlvbnMucGRm0_pages_1
Title: Benefit_Options.pdf


## **RAG**

In [53]:
user_question = input("Please enter your question: ")

# Getting the top 3 matching documents from azure ai search by embedding process
search_results = search_client.search(
    None,
    top=3,
    vector_queries=[
        VectorizableTextQuery(
            text=user_question,
            k_nearest_neighbors=3,
            fields="text_vector"
        )   
    ]
)

# Defining the context
context = ""
for result in search_results:
    context += result['chunk'] + "\n\n"

SYSTEM_PROMPT = f"""You are an AI assistant that helps people find information. 
Be brief in your answers. Answer ONLY with the facts listed in the context below.

Context:
{context}
"""
USER_PROMPT = user_question

response = openai_client.chat.completions.create(
    model=os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME"),
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT},
    ],    
)

answer = response.choices[0].message.content
print(answer)

The Standard plan costs $6,350 for an individual and $12,700 for a family. The price for Northwind Health Plus is not provided, so the most expensive plan cannot be determined from the given information.


In [46]:
example = "What is included in my Northwind Health Plus plan that is not standard?"