In [1]:
from openai import AzureOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.models import VectorizedQuery
from azure.search.documents import SearchClient, IndexDocumentsBatch

In [None]:
# Azure Embedding Model
AOAI_ENDPOINT = "https://vigne-mbw9rlh6-eastus2.cognitiveservices.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15"
AOAI_KEY =".."
AOAI_API_VERSION = "2023-05-15"        # Use a recent, available API version
AOAI_EMBEDDING_MODEL_DEPLOYMENT = "text-embedding-ada-002" # Your deployment name for the embedding model


# Azure AI Search
SEARCH_ENDPOINT ="https://careintel-search.search.windows.net"
SEARCH_KEY =".."
INDEX_NAME = "careintel-enriched-index" # A name for your new search index

In [3]:
def generate_embedding(text, model=AOAI_EMBEDDING_MODEL_DEPLOYMENT):
    """Generates a vector embedding for a given piece of text."""
    try:
        # OpenAI API does not accept empty strings, so we send a space instead
        return client.embeddings.create(input=[text if text.strip() else " "], model=model).data[0].embedding
    except Exception as e:
        print(f"Error generating embedding for text: '{text[:50]}...'. Error: {e}")
        return None

In [4]:
client = AzureOpenAI(
  api_key=AOAI_KEY,
  api_version=AOAI_API_VERSION,
  azure_endpoint=AOAI_ENDPOINT
)

In [39]:

user_query = "Are patients coming back to the ICU within two days of discharge?"
user_role = "managers" 

# 1. Generate an embedding for the user's query
query_vector = generate_embedding(user_query)

# 2. Perform the vector search with a role filter
if query_vector:
    vector_query = VectorizedQuery(vector=query_vector, k_nearest_neighbors=3, fields="content_vector")
    
    search_client = SearchClient(endpoint=SEARCH_ENDPOINT, index_name=INDEX_NAME, credential=AzureKeyCredential(SEARCH_KEY))

    # *** FINAL CODE CHANGE ***
    # Using a standard text search (`search_text`) on the `accessible_to_roles`
    # field. This is a more direct and reliable method than using a complex filter.
    results = search_client.search(
        search_text=user_role,
        search_fields=["accessible_to_roles"],
        vector_queries=[vector_query],
        select=["table_name", "column_name", "content", "accessible_to_roles"],
        top=3,  # Adjust the number of results as needed
    )

    # 3. Print the results
    print(f"Query: '{user_query}'")
    print(f"Role: '{user_role}'\n")
    print("Relevant results from our knowledge base for this role:\n")
    
#     # Check if the iterator has any items before looping
#     results_list = list(results)
#     if not results_list:
#         print("No results found for the given query and role.")
#     else:
#         for result in results_list:
#             print(f"  Score: {result['@search.score']:.4f}")
#             print(f"  Table: {result['table_name']}")
#             print(f"  Column: {result['column_name']}")
#             print(f"  Content: {result['content']}")
#             print(f"  Accessible To: {result['accessible_to_roles']}\n")
#             print("-" * 20)
# else:
#     print("Could not generate a query vector.")

Query: 'Are patients coming back to the ICU within two days of discharge?'
Role: 'managers'

Relevant results from our knowledge base for this role:



In [40]:
# Check if the iterator has any items before looping
results_list = list(results)
if not results_list:
    print("No results found for the given query and role.")
else:
    print(f"Found {len(results_list)} results:")
    cleaned_results = [
        {
            "KPI/Metric": item["column_name"],
            "content": item["content"],
            "table_name": item["table_name"]
        }
        for item in results_list
    ]

Found 3 results:


In [41]:
cleaned_results

[{'KPI/Metric': 'icu_readmission_rate_48h',
  'content': 'Metric: icu_readmission_rate_48h. Description: Percentage of patients readmitted to ICU within 48 hours. Calculation: (ICU Readmissions within 48h / Total Discharges) * 100. Belongs to dataset:  icu_metrics.',
  'table_name': 'icu_metrics'},
 {'KPI/Metric': 'readmission_rate_30d',
  'content': 'Metric: readmission_rate_30d. Description: Percentage of patients readmitted within 30 days. Calculation: (30-Day Readmissions / Discharges) * 100. Belongs to dataset: inpatient_ward_metrics.',
  'table_name': 'inpatient_ward_metrics'},
 {'KPI/Metric': 'avg_length_of_stay_days',
  'content': 'Metric: avg_length_of_stay_days. Description: Average length of stay in ICU in days. Calculation: Total ICU Days / Number of Discharges. Belongs to dataset:  icu_metrics.',
  'table_name': 'icu_metrics'}]

In [14]:
# Check if the iterator has any items before looping
results_list = list(results)
if not results_list:
    print("No results found for the given query and role.")
else:
    for result in results_list:
        print(f"  Score: {result['@search.score']:.4f}")
        print(f"  Table: {result['table_name']}")
        print(f"  Column: {result['column_name']}")
        print(f"  Content: {result['content']}")
        print(f"  Accessible To: {result['accessible_to_roles']}\n")
        print("-" * 20)

No results found for the given query and role.


In [13]:
results_list

[{'accessible_to_roles': 'managers',
  'column_name': 'icu_readmission_rate_48h',
  'content': 'Metric: icu_readmission_rate_48h. Description: Percentage of patients readmitted to ICU within 48 hours. Calculation: (ICU Readmissions within 48h / Total Discharges) * 100. Belongs to dataset:  icu_metrics.',
  'table_name': 'icu_metrics',
  '@search.score': 0.02500000223517418,
  '@search.reranker_score': None,
  '@search.highlights': None,
  '@search.captions': None},
 {'accessible_to_roles': 'managers',
  'column_name': 'readmission_rate_30d',
  'content': 'Metric: readmission_rate_30d. Description: Percentage of patients readmitted within 30 days. Calculation: (30-Day Readmissions / Discharges) * 100. Belongs to dataset: inpatient_ward_metrics.',
  'table_name': 'inpatient_ward_metrics',
  '@search.score': 0.02432994917035103,
  '@search.reranker_score': None,
  '@search.highlights': None,
  '@search.captions': None},
 {'accessible_to_roles': 'administrative_staff,managers',
  'column_n