In [None]:
# test_search.py - Corrected Version 2

import requests
import json
import time
import os
import pandas as pd
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import (
    VectorizedQuery,
    QueryType
    # SemanticQueryCaption, # Keep removed if library not upgraded
    # SemanticQueryAnswer   # Keep removed if library not upgraded
)
from dotenv import load_dotenv

# --- Load Environment Variables ---
dotenv_path = "../.env"
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path)
    print(f"Loaded environment variables from: {dotenv_path}")
else:
    print(f"Warning: .env file not found at {dotenv_path}. Attempting to load from current directory...")
    if load_dotenv():
        print("Loaded environment variables from current directory.")
    else:
        print("Warning: No .env file found. Relying on environment variables set externally.")

# --- Configuration ---
# Ollama Config
# --- >>> CRITICAL FIX: Use correct env var name AND correct default endpoint <<< ---
OLLAMA_EMBEDDINGS_ENDPOINT = os.getenv("OLLAMA_ENDPOINTT", "http://localhost:11434/api/embeddings") # Use OLLAMA_ENDPOINT, default to /api/embeddings
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "nomic-embed-text")
try:
    # Ensure dimension is integer
    VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "768"))
except ValueError:
    print("Error: VECTOR_DIMENSION environment variable is not a valid integer. Using default 768.")
    VECTOR_DIMENSION = 768

# Azure AI Search Config (Ensure these match your setup)
SEARCH_SERVICE_ENDPOINT = os.environ.get("AZURE_SEARCH_SERVICE_ENDPOINT")
SEARCH_API_KEY = os.environ.get("AZURE_SEARCH_API_KEY")
SEARCH_INDEX_NAME = os.environ.get("AZURE_SEARCH_INDEX_NAME")
SEMANTIC_CONFIG_NAME = os.environ.get("AZURE_SEMANTIC_CONFIGURATION_NAME")

# Search Defaults
SEARCH_TOP_K = 5

# --- Validate Essential Config ---
if not SEARCH_SERVICE_ENDPOINT or "<" in SEARCH_SERVICE_ENDPOINT:
    print("Error: Azure Search Service Endpoint not configured correctly (AZURE_SEARCH_SERVICE_ENDPOINT). Exiting."); exit(1)
if not SEARCH_API_KEY or "<" in SEARCH_API_KEY:
    print("Error: Azure Search API Key not configured (AZURE_SEARCH_API_KEY). Exiting."); exit(1)
if not SEARCH_INDEX_NAME:
    print("Error: Azure Search Index Name not configured (AZURE_SEARCH_INDEX_NAME). Exiting."); exit(1)
if not SEMANTIC_CONFIG_NAME:
    print("Warning: AZURE_SEMANTIC_CONFIGURATION_NAME not set. Semantic Hybrid search might fail.")


# --- Ollama API Call Function for Single Embedding ---
def get_ollama_embedding(query_text: str, model: str = OLLAMA_MODEL):
    """Calls the local Ollama API (/api/embeddings) to get embedding for a single text."""
    global VECTOR_DIMENSION

    if pd.isna(query_text): query_text = ""
    query_text = str(query_text).strip()

    if not query_text:
        print("Warning: Cannot generate embedding for empty text.")
        return None

    # This log message will now correctly reflect the target endpoint from config
    print(f"    Generating embedding for query using model '{model}' at {OLLAMA_EMBEDDINGS_ENDPOINT}...")
    try:
        # --- >>> CRITICAL FIX: Use 'prompt' key for /api/embeddings payload <<< ---
        response = requests.post(
            OLLAMA_EMBEDDINGS_ENDPOINT, # Should now correctly point to /api/embeddings
            json={"model": model, "prompt": query_text}, # Use 'prompt' key
            timeout=180
        )
        response.raise_for_status() # Will raise an error if API call failed (e.g., 404 Not Found if endpoint is wrong)
        response_json = response.json()

        # --- >>> CRITICAL FIX: Expect 'embedding' (singular) key from /api/embeddings response <<< ---
        if "embedding" in response_json:
             embedding_vector = response_json["embedding"] # Assign the actual vector list
             # Check type and dimension
             if isinstance(embedding_vector, list) and len(embedding_vector) == VECTOR_DIMENSION:
                 print(f"    Embedding received (dimension: {len(embedding_vector)}).")
                 return embedding_vector # Return the vector itself
             else:
                 # This error check is now accurate for the expected response format
                 print(f"    Error: Invalid embedding format/dimension received from Ollama (Expected: {VECTOR_DIMENSION}, Got length: {len(embedding_vector) if isinstance(embedding_vector, list) else 'N/A'}, Type: {type(embedding_vector)}).")
                 return None
        else:
             # This indicates the /api/embeddings endpoint didn't return the expected key
             print(f"    Error: 'embedding' key missing in Ollama response from {OLLAMA_EMBEDDINGS_ENDPOINT}. Response: {response_json}")
             return None

    except requests.exceptions.Timeout:
        print(f"    Error: Ollama API request timed out.")
    except requests.exceptions.ConnectionError:
        print(f"    Error: Could not connect to Ollama API at {OLLAMA_EMBEDDINGS_ENDPOINT}. Is Ollama running?")
    except requests.exceptions.RequestException as e:
        # This will catch HTTP errors like 404 if the endpoint is still wrong or Ollama API changed
        print(f"    Error calling Ollama API: {e}")
        # Optionally print response body for debugging HTTP errors
        if e.response is not None:
            print(f"    Response status code: {e.response.status_code}")
            try:
                print(f"    Response body: {e.response.json()}")
            except json.JSONDecodeError:
                print(f"    Response body (non-JSON): {e.response.text}")
    except json.JSONDecodeError:
        print(f"    Error: Could not decode JSON response from Ollama API. Response Text: {response.text}")
    except Exception as e:
        print(f"    An unexpected error occurred during embedding generation: {e}")
    return None

# --- Azure Search: Search Function ---
# (No changes here, assuming previous modifications for older library are kept if needed)
def search_movies(query_text: str, search_mode: str = "hybrid", top_k: int = SEARCH_TOP_K):
    """Performs a search on the existing Azure AI Search index."""
    global SEARCH_SERVICE_ENDPOINT, SEARCH_API_KEY, SEARCH_INDEX_NAME, OLLAMA_MODEL, SEMANTIC_CONFIG_NAME

    if not query_text: print("Error: Query text cannot be empty."); return

    print(f"\n--- Performing {search_mode.upper()} Search (Top {top_k}) ---")
    print(f"Query: '{query_text}'")

    query_vector = None
    vector_query = None
    if search_mode in ["vector", "hybrid", "semantic_hybrid"]:
        start_embed_time = time.time()
        query_vector = get_ollama_embedding(query_text, model=OLLAMA_MODEL) # Calls the fixed function
        embed_time = time.time() - start_embed_time
        if query_vector:
            print(f"    Query embedding generated successfully (Took {embed_time:.2f} sec).")
            vector_query = VectorizedQuery(vector=query_vector, k_nearest_neighbors=top_k, fields="embedding")
        else:
            print(f"    Error: Failed to get query embedding after {embed_time:.2f} sec.")
            if search_mode == "vector": print("    Cannot perform PURE VECTOR search without embedding. Aborting search."); return
            elif search_mode == "semantic_hybrid": print("    Warning: Proceeding with SEMANTIC HYBRID using only text + reranking.")
            elif search_mode == "hybrid": print("    Warning: Proceeding with HYBRID using only text keyword search.")

    try:
        credential = AzureKeyCredential(SEARCH_API_KEY)
        search_client = SearchClient(endpoint=SEARCH_SERVICE_ENDPOINT, index_name=SEARCH_INDEX_NAME, credential=credential)
    except Exception as e: print(f"  Error creating SearchClient: {e}"); return

    try:
        search_start_time = time.time()
        results = None
        select_fields = ["movie_id", "title", "overview", "genres", "tagline"]

        if search_mode == "vector":
            if not vector_query: print("   Error: Vector query object not available for pure vector search."); return
            print(f"    Executing pure vector search...")
            results = search_client.search(search_text=None, vector_queries=[vector_query], select=select_fields, top=top_k)
        elif search_mode == "hybrid":
            print(f"    Executing hybrid search (text + vector)...")
            results = search_client.search(search_text=query_text, vector_queries=[vector_query] if vector_query else None, select=select_fields, top=top_k)
        elif search_mode == "semantic_hybrid":
             if not SEMANTIC_CONFIG_NAME: print("   Error: Cannot perform semantic search - AZURE_SEMANTIC_CONFIGURATION_NAME not set."); return
             print(f"    Executing Semantic Hybrid search (config '{SEMANTIC_CONFIG_NAME}')...")
             results = search_client.search(search_text=query_text, vector_queries=[vector_query] if vector_query else None, select=select_fields, query_type=QueryType.SEMANTIC, semantic_configuration_name=SEMANTIC_CONFIG_NAME, top=top_k)
        else: # Keyword
            print(f"    Executing keyword search...")
            results = search_client.search(search_text=query_text, select=select_fields, top=top_k)

        print("\n    --- Search Results ---")
        count = 0
        for result in results:
            count += 1
            print(f"    {count}. Title: {result.get('title', 'N/A')}")
            print(f"       ID: {result.get('movie_id', 'N/A')}")
            reranker_score = result.get('@search.reranker_score')
            if reranker_score is not None: print(f"       Semantic Score: {reranker_score:.4f}")
            else:
                base_score = result.get('@search.score')
                if base_score is not None: print(f"       Score: {base_score:.4f}")
            print(f"       Overview: {result.get('overview', 'N/A')[:200]}...")
            print(f"       Genres: {result.get('genres', 'N/A')}")
            print(f"       Tagline: {result.get('tagline', 'N/A')}")
            print("    " + "-" * 20)

        if count == 0: print("    No results found.")
        search_time = time.time() - search_start_time
        print(f"\n    Search execution time: {search_time:.2f} seconds.")

    except Exception as e:
        print(f"    An error occurred during search execution: {e}")

# --- Main Execution Block ---
if __name__ == "__main__":
    print("="*60)
    print(" Azure AI Search Test Script")
    print(f" Start Time: {time.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f" Using Azure Endpoint: {SEARCH_SERVICE_ENDPOINT}")
    print(f" Using Index Name: {SEARCH_INDEX_NAME}")
    print(f" Using Ollama Model: {OLLAMA_MODEL} (Dim: {VECTOR_DIMENSION})")
    print(f" Using Ollama Endpoint: {OLLAMA_EMBEDDINGS_ENDPOINT}") # Verify this prints the correct endpoint
    if SEMANTIC_CONFIG_NAME: print(f" Using Semantic Config: {SEMANTIC_CONFIG_NAME}")
    else: print(f" Semantic Config Name: Not Set (Semantic search may fail)")
    print("="*60)

    """    # --- Test Queries ---
    query1 = "lonely robot cleaning up a polluted earth"
    query2 = "philosophical look at artificial intelligence in space"
    query3 = "movies about charming thieves who pull off a big heist"
    query4 = "what are some critically acclaimed romantic comedies from the 90s?"
    query5 = "dystopian future where society is divided into factions"

    # Run tests for different modes
    search_movies(query1, search_mode="hybrid")
    search_movies(query2, search_mode="vector")
    search_movies(query4, search_mode="semantic_hybrid")
    search_movies(query5, search_mode="keyword")

    print(f"\nScript finished at {time.strftime('%Y-%m-%d %H:%M:%S')}")
    print("="*60) """

IndentationError: unindent does not match any outer indentation level (<string>, line 209)

In [19]:
query1 = "lonely robot cleaning up a polluted earth"
search_movies(query1, search_mode="hybrid")


--- Performing HYBRID Search (Top 5) ---
Query: 'lonely robot cleaning up a polluted earth'
    Generating embedding for query using model 'nomic-embed-text' at http://localhost:11434/api/embed...
    Error: 'embedding' key missing in Ollama response. Response: {'model': 'nomic-embed-text', 'embeddings': [[-0.0024020025, 0.007908286, -0.1552194, -0.017964492, 0.051347446, 0.12480845, -0.02690247, 0.046382405, 0.01971788, -0.04814501, -0.00935061, 0.07010146, 0.042321518, 0.022624234, -0.03421293, -0.0055212346, 0.03581116, -0.013193976, -0.01154996, 0.025591975, 0.009049927, -0.046134103, 0.05927946, 0.023231661, 0.053235304, 0.062610604, 0.037086565, -0.027968952, 0.084816284, -0.029294707, 0.018020263, -0.03687179, -0.04785703, 0.025644228, -0.03354989, -0.05989352, 0.09023104, 0.0114711365, 0.093536474, 0.035722606, -0.02915929, -0.001093338, -0.011973042, -0.04537316, 0.029264158, -0.012714448, 0.027653921, 0.010614077, 0.029178316, -0.034484692, 0.055210494, -0.011808553, -0.0530

In [3]:
query2 = "philosophical look at artificial intelligence in space"
search_movies(query2, search_mode="vector")

NameError: name 'search_movies' is not defined

In [21]:
query3 = "movies about charming thieves who pull off a big heist"
search_movies(query3, search_mode="hybrid", top_k=3) # Example with different top_k


--- Performing HYBRID Search (Top 3) ---
Query: 'movies about charming thieves who pull off a big heist'
    Generating embedding for query using model 'nomic-embed-text' at http://localhost:11434/api/embed...
    Error: 'embedding' key missing in Ollama response. Response: {'model': 'nomic-embed-text', 'embeddings': [[-0.044586957, 0.050469764, -0.199531, 0.047957394, 0.023017982, -0.01144092, -0.0065296753, 0.043037664, -0.027801821, 0.04740882, -0.086032875, 0.03825591, 0.017322093, -0.00082642335, -0.0046662684, -0.036677133, 0.015904296, -0.04995851, 0.0031345456, 0.046301488, -0.030151254, -0.013055773, -0.0038307596, 0.012755394, 0.04901216, 0.013312594, -0.066982955, -0.025691535, -0.04379152, -0.014123656, -0.027399685, -0.056741104, -0.080789946, -0.018713897, -0.048422586, -0.021372242, 0.044824973, 0.028126948, 0.007954528, -0.02293712, 0.003566751, 0.07532406, -0.0033390643, 0.03448871, 0.040438656, -0.0043174177, 0.050586198, -0.02180905, 0.03824446, 0.010796634, 0.03230

In [22]:
query4 = "what are some critically acclaimed romantic comedies from the 90s?"
search_movies(query4, search_mode="semantic_hybrid") # Good candidate for semantic


--- Performing SEMANTIC_HYBRID Search (Top 5) ---
Query: 'what are some critically acclaimed romantic comedies from the 90s?'
    Generating embedding for query using model 'nomic-embed-text' at http://localhost:11434/api/embed...
    Error: 'embedding' key missing in Ollama response. Response: {'model': 'nomic-embed-text', 'embeddings': [[-0.016739052, -0.009358457, -0.17573561, 0.030814854, -0.020962415, 0.054617554, -0.024044095, 0.015806502, 0.011577848, 0.022469293, -0.020710127, 0.032919485, 0.056142375, 0.011054143, 0.0026723822, -0.043806434, -0.021224126, -0.056904186, -0.054811712, 0.02184972, -0.027784968, -0.08771595, -0.006188885, 0.0025892735, 0.046201218, 0.0952358, -0.06426978, -0.003459057, -0.08867196, -0.014093856, 0.0082986485, -0.0068265614, -0.01945462, -0.0001531245, -0.024941912, -0.06201643, 0.038035545, -0.034323916, -0.00073633366, -0.0055564204, 0.03790873, 0.05419761, 0.012010001, 0.03037887, 0.0839457, -0.06679239, -0.032525193, 0.0050835907, 0.01705477, 

In [23]:
query5 = "dystopian future where society is divided into factions"
search_movies(query5, search_mode="keyword")


--- Performing KEYWORD Search (Top 5) ---
Query: 'dystopian future where society is divided into factions'
    Executing keyword search...

    --- Search Results ---
    1. Title: The Time Machine
       ID: 2134
       Score: 24.3491
       Overview: A Victorian Englishman travels to the far future and finds that humanity has divided into two hostile species....
       Genres: Thriller, Adventure, Fantasy, Science Fiction, Romance
       Tagline: You Will Orbit into the Fantastic Future!
    --------------------
    2. Title: Mad Max
       ID: 9659
       Score: 18.0186
       Overview: In a dystopian future Australia, a vicious biker gang murders a cop's family, and makes his fight with them personal....
       Genres: Adventure, Action, Thriller, Science Fiction
       Tagline: The Maximum Force Of The Future
    --------------------
    3. Title: Judge Dredd
       ID: 9482
       Score: 17.3052
       Overview: In a dystopian future, Dredd, the most famous judge (a cop with ins