delete documents from azure ai search

In [3]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Set up Azure AI Search credentials
SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
SEARCH_API_KEY = os.getenv("AZURE_SEARCH_API_KEY")
search_index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

# Initialize the SearchClient
search_client = SearchClient(endpoint=SEARCH_ENDPOINT, index_name=search_index_name, credential=AzureKeyCredential(SEARCH_API_KEY))

# Fetch all document IDs
document_ids = [doc["id"] for doc in search_client.search(search_text="*", select=["id"])]

# Delete all documents
if document_ids:
    search_client.delete_documents(documents=[{"id": doc_id} for doc_id in document_ids])
    print(f"Deleted {len(document_ids)} documents from index '{search_index_name}'.")
else:
    print("No documents found in the index.")

No documents found in the index.


In [None]:
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from openai import AzureOpenAI

# Load environment variables
load_dotenv()

# Azure AI Search configuration
search_endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
search_index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
search_api_key = os.getenv("AZURE_SEARCH_API_KEY")

# Azure OpenAI configuration
openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
openai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
embedding_deployment_name = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT_NAME")

# Initialize Azure OpenAI client
openai_client = AzureOpenAI(
    api_key=openai_api_key,
    api_version="2023-12-01-preview",  # Use the latest supported version
    azure_endpoint=openai_endpoint
)

# Initialize Azure AI Search client
search_client = SearchClient(
    endpoint=search_endpoint,
    index_name=search_index_name,
    credential=AzureKeyCredential(search_api_key)
)

# Function to generate embeddings using Azure OpenAI
def generate_embedding(text):
    response = openai_client.embeddings.create(
        input=text,
        model=embedding_deployment_name,
        dimensions=3072  # Adjust dimensions based on your embedding model
    )
    return response.data[0].embedding

# Function to perform pure vector search
def vector_search(query):
    # Step 1: Generate embeddings for the query
    embedding = generate_embedding(query)

    # Step 2: Create a vectorized query
    vector_query = VectorizedQuery(
        vector=embedding,
        k_nearest_neighbors=50,  # Number of nearest neighbors to retrieve
        fields="embedding"  # Field containing the embeddings
    )

    # Step 3: Perform vector search
    results = search_client.search(
        search_text=None,  # No full-text search
        vector_queries=[vector_query],  # Only vector search
        select=["document_name", "content", "page_number"],  # Fields to return
        top=5  # Number of results to return
    )

    # Step 4: Process and display results
    print(f"Vector search results for query: '{query}'\n")
    for result in results:
        print(f"Document: {result['document_name']}")
        print(f"Page: {result['page_number']}")
        print(f"Content: {result['content']}")
        print("-" * 40)

# Example usage
def test_vector_search():
    query = "Vehicle Mounted Elevating and Rotating Work Platforms"  # Replace with your test query
    vector_search(query)

# Call the test function
test_vector_search()

Converting image to base64

In [None]:
import base64

def get_image_as_base64(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

logo_base64 = get_image_as_base64("/Users/aakashwalavalkar/Desktop/construction-safety/images/caiel.png")