In [1]:
# Add project root to sys.path
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import os
import uuid
from dotenv import load_dotenv
from supabase import create_client, Client # Needed for client creation and auth

# Import necessary services
from src.llm.OpenAIClient import OpenAIClient
from src.services.SupabaseService import SupabaseService # For DB/Storage interaction wrapper

# --- Configuration and Setup ---
load_dotenv() # Load environment variables once

TEST_EMAIL = os.environ.get("TEST_EMAIL")
TEST_PASSWORD = os.environ.get("TEST_PASSWORD")
if not TEST_EMAIL or not TEST_PASSWORD:
    raise ValueError("TEST_EMAIL and TEST_PASSWORD must be set in your .env file.")

query_text = "Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021?"
K_SIMILAR_CHUNKS = 2

print(f"Test Email: {TEST_EMAIL}")
print(f"Query: '{query_text}'")
print(f"Retrieving {K_SIMILAR_CHUNKS} chunks.")

# --- Supabase Authentication ---
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_ANON_KEY")
if not supabase_url or not supabase_key:
    raise ValueError("SUPABASE_URL and SUPABASE_ANON_KEY must be set.")

try:
    auth_client: Client = create_client(supabase_url, supabase_key)
    auth_response = auth_client.auth.sign_in_with_password(
        {"email": TEST_EMAIL, "password": TEST_PASSWORD}
    )

    if not auth_response or not auth_response.user:
        error_msg = "Supabase authentication failed."
        if hasattr(auth_response, 'error') and auth_response.error:
             error_msg += f" Error: {auth_response.error}"
        raise ConnectionError(error_msg)

    authenticated_user_id_uuid = uuid.UUID(auth_response.user.id)
    print(f"Authentication successful. User ID: {authenticated_user_id_uuid}")

except Exception as e:
    print(f"Authentication error: {e}")
    raise ConnectionError("Authentication failed.") # Stop execution

# --- Embedding the Query ---
try:
    openai_client = OpenAIClient() # Client init prints confirmation
    query_embedding_list = openai_client.get_embeddings([query_text])

    if not query_embedding_list:
        raise ValueError("OpenAI returned an empty embedding list.")

    query_embedding = query_embedding_list[0]
    # print(f"Query embedding generated ({len(query_embedding)} dimensions).") # Keep minimal, skip printing embedding slice

except Exception as e:
    print(f"Error generating query embedding: {e}")
    # Raise the exception to stop execution if embedding fails, as we can't search without it.
    raise RuntimeError("Query embedding generation failed.")

# --- Supabase Vector Search (using authenticated user ID) ---
try:
    # SupabaseService uses the provided authenticated client
    supabase_service = SupabaseService(supabase_client=auth_client)

    print(f"\nSearching Supabase for {K_SIMILAR_CHUNKS} chunks...")

    response = supabase_service.client.rpc(
        'match_chunks',
        {
            'query_embedding': query_embedding,
            'match_count': K_SIMILAR_CHUNKS,
            'user_id': str(authenticated_user_id_uuid) # Ensure user_id is passed as string to RPC
        }
    ).execute()

    retrieved_chunks = response.data
    if retrieved_chunks:
        print(f"Retrieved {len(retrieved_chunks)} chunks.")
        print("\n--- Retrieved Chunks ---")
        for i, chunk in enumerate(retrieved_chunks):
            score = chunk.get('similarity_score', 'N/A')
            doc_id = chunk.get('document_id')
            section_heading = chunk.get('section_heading') or 'Unknown Section'
            doc_type = chunk.get('doc_specific_type')
            doc_year = chunk.get('doc_year')
            company_name = chunk.get('company_name')
            chunk_text = chunk.get('chunk_text', '')

            print(f"\nChunk {i+1}:")
            print(f"  Score (Cosine Distance): {score:.4f}") # Format score for clarity
            print(f"  Document ID: {doc_id}") # Use ID consistently
            print(f"  Section: {section_heading}")
            print(f"  Metadata: Type={doc_type}, Year={doc_year}, Company={company_name}")
            print(f"  Text: {chunk_text}\n") # Adjust slice for better snippet

        print("\n--- End Retrieved Chunks ---")

    else:
        print("No chunks retrieved.")
        if hasattr(response, 'error') and response.error:
             print(f"Supabase query error: {response.error}")

except Exception as e:
    print(f"An error occurred during Supabase search: {e}")
    # Optionally raise here too if search failure is critical
    # raise RuntimeError("Supabase search failed.")

Test Email: wbryanlai@gmail.com
Query: 'Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021?'
Retrieving 2 chunks.
Authentication successful. User ID: e222921f-cfdc-4a05-8cf2-aea13004bcf2
Initialized OpenAI client with model: text-embedding-3-small
SupabaseService initialized with provided client.

Searching Supabase for 2 chunks...
Retrieved 2 chunks.

--- Retrieved Chunks ---

Chunk 1:
  Score (Cosine Distance): 0.2396
  Document ID: 2aa8b3c7-8a74-4626-aac1-9eeb0d2cab6a
  Section: Note 4 – Goodwill and Intangible Assets
  Metadata: Type=Annual Report, Year=2021, Company=Tesla, Inc.
  Text: # Note 4 – Goodwill and Intangible Assets

Goodwill decreased $7 million within the automotive segment from $207 million as of December 31, 2020 to $200 million as of December 31, 2021. There were no accumulated impairment losses as of December 31, 2021 and 2020.

Information regarding our intangible assets including assets recognized from our acquisitions was as follows (