# When to disregard results
A semantic search will likely always return some results.  The questions is if those results are worthy of inclusion.  If you simply augment the prompt with any results that you return that leads token usage which could have been avoided.

For example, if are using  highlight_fields="vendorName" and a search is peformed for a vendor name and the @search.highlights indicates no direct text matches and there is a very low @search.score < 0.05 and @search.reranker_score < 3.0 then we can disregard the result as it's likely not going to be of use to the LLM.

Take a look at the filter_search_results for an example and notice how the search for 'X' those results are disregarded even tho we get results in our search.



In [None]:
from operator import index
import os
from pathlib import Path
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from openai import AzureOpenAI  # If you're using Azure OpenAI for embeddings
from dotenv import load_dotenv
from azure.search.documents.models import VectorizedQuery


# Get root directory path
root_dir = Path().absolute().parent
env_path = root_dir / '.env'

# Load .env from root
load_dotenv(dotenv_path=env_path)
print(f"Loaded .env from {env_path}")
# Access variables
api_key = os.getenv('AZURE_OPENAI_API_KEY')
debug = os.getenv('DEBUG')
more_research = os.getenv('MORE_RESEARCH')
index_name = os.getenv('AZURE_SEARCH_INDEX')


print(f"API Key: {  api_key[:4] + '*' * 28 + api_key[-4:] }")
print(f"Index Name: {index_name}")

search_endpoint = os.getenv('AZURE_SEARCH_ENDPOINT')
index_name = os.getenv('INDEX_NAME')
search_key = os.getenv('AZURE_SEARCH_KEY')
index_name = os.getenv('AZURE_SEARCH_INDEX')

aoai_client = AzureOpenAI(
        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
        api_key=os.getenv("AZURE_OPENAI_KEY"),  
        api_version="2023-05-15"
        )

def generate_embeddings(text, model="text-embedding-ada-002"): # model = "deployment_name"
    return aoai_client.embeddings.create(input = [text], model=model).data[0].embedding

def filter_search_results(results):
    filtered_results = []
    for result in results:
        base_score = result.get('@search.score', 0)
        reranker_score = result.get('@search.reranker_score', 0)
        highlights = result.get('@search.highlights')
        
        # For vendor name searches, require either:
        # 1. Text highlights present (direct matches) OR
        # 2. Higher minimum scores for non-highlighted results
        if highlights:
            # If we have highlights, use lower thresholds
            if base_score >= 0.01 and reranker_score >= 1.0:
                filtered_results.append(result)
        else:
            # No highlights - require higher scores to compensate
            if base_score >= 0.05 and reranker_score >= 3.0:
                filtered_results.append(result)
    
    return filtered_results if filtered_results else None

# Test vector search
search_client = SearchClient(endpoint=search_endpoint , index_name=index_name, credential=AzureKeyCredential(search_key))

query = "Fabrikam"
query_vector = generate_embeddings(query)

top = 3
vector_query = VectorizedQuery(vector=query_vector, k_nearest_neighbors=top, fields="vendorNameVector", kind="vector")
print(f"\nFirst 1: using: {query}")
# results = search_client.search(
#     search_text=query,  
#     vector_queries=[vector_query],
#     select=["id", "contractId", "vendorName"]
# )

results = search_client.search(
    search_text=query,
    vector_queries=[vector_query],
    select=["id", "contractId", "vendorName"],
    highlight_fields="vendorName",
    query_type="semantic",  # Enable semantic search
    semantic_configuration_name="default",  # You'll need to set this up
)

filtered_results=filter_search_results(results) # removed the items that are not good enough

for result in filtered_results:
    print(result)

query = "What can you tell me about the vendor agreement between Fabrikam Services and Contoso Elite?"
# Option 1: Pure Text Search with Weights
print(f"\nPure Text Search with Weights, using: {query} ")
results = search_client.search(
    search_text= query,
    select=["id", "contractId", "vendorName"], 
    highlight_fields="vendorName",
)

for result in results:
    print(result)
    
# Option 2: Hybrid Search with Proper Query
query = "X"
print(f"\nHybrid Search with Proper Query, using: {query} ")

query_vector = generate_embeddings(query)  

vector_query = VectorizedQuery(vector=query_vector, k_nearest_neighbors=3, fields="vendorNameVector", kind="vector")

results = search_client.search(
    search_text=query,
    vector_queries=[vector_query],
    select=["id", "contractId", "vendorName"],
    highlight_fields="vendorName",
    query_type="semantic",  # Enable semantic search
    semantic_configuration_name="default",  # You'll need to set this up
)

filtered_results=filter_search_results(results) # removed the items that are not good enough

if filtered_results:  # if there are any results
    for result in filtered_results:
        print(result)
else:
    print("No results found")


