In [1]:
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import os
import uuid
from dotenv import load_dotenv
from supabase import create_client, Client

from src.llm.OpenAIClient import OpenAIClient
from src.storage.SupabaseService import SupabaseService
from src.enums import FinancialDocSpecificType

load_dotenv()

TEST_EMAIL = os.environ.get("TEST_EMAIL")
TEST_PASSWORD = os.environ.get("TEST_PASSWORD")
if not TEST_EMAIL or not TEST_PASSWORD:
    raise ValueError("TEST_EMAIL and TEST_PASSWORD must be set in your .env file.")

query_text = "Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021?"

K_SIMILAR_CHUNKS = 5

print(f"Test Email: {TEST_EMAIL}")
print(f"Query: '{query_text}'")
print(f"Retrieving {K_SIMILAR_CHUNKS} chunks.")

# --- Define Optional Metadata Filters ---
doc_specific_type_filter = FinancialDocSpecificType.ANNUAL_REPORT.value
company_name_filter = "tesla"
doc_year_start_filter = 2021
doc_year_end_filter = 2021
doc_quarter_filter = None

# Example: Only filter by company name (uncomment to test this case)
# doc_specific_type_filter = None
# company_name_filter = "Alphabet"
# doc_year_start_filter = None
# doc_year_end_filter = None
# doc_quarter_filter = None

# Example: No metadata filters (Pure vector search + user ID) (uncomment to test this case)
# doc_specific_type_filter = None
# company_name_filter = None
# doc_year_start_filter = None
# doc_year_end_filter = None
# doc_quarter_filter = None


print("\nApplying Filters:")
print(f"  Specific Type: {doc_specific_type_filter}")
print(f"  Company Name (contains): {company_name_filter}")
print(f"  Year Range: {doc_year_start_filter} - {doc_year_end_filter}")
print(f"  Quarter: {doc_quarter_filter}")

# --- Supabase Authentication ---
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_ANON_KEY")
if not supabase_url or not supabase_key:
    raise ValueError("SUPABASE_URL and SUPABASE_ANON_KEY must be set.")

try:
    auth_client: Client = create_client(supabase_url, supabase_key)
    auth_response = auth_client.auth.sign_in_with_password(
        {"email": TEST_EMAIL, "password": TEST_PASSWORD}
    )

    if not auth_response or not auth_response.user:
        error_msg = "Supabase authentication failed."
        if hasattr(auth_response, 'error') and auth_response.error:
             error_msg += f" Error: {auth_response.error}"
        raise ConnectionError(error_msg)

    authenticated_user_id_uuid = uuid.UUID(auth_response.user.id)
    print(f"Authentication successful. User ID: {authenticated_user_id_uuid}")

except Exception as e:
    print(f"Authentication error: {e}")
    raise ConnectionError("Authentication failed.")

# --- Embedding the Query ---
try:
    openai_client = OpenAIClient()
    query_embedding_list = openai_client.get_embeddings([query_text])

    if not query_embedding_list:
        raise ValueError("OpenAI returned an empty embedding list.")

    query_embedding = query_embedding_list[0]

except Exception as e:
    print(f"Error generating query embedding: {e}")
    raise RuntimeError("Query embedding generation failed.")

# --- Supabase Vector Search (using authenticated user ID and metadata filters) ---
try:
    supabase_service = SupabaseService(supabase_client=auth_client)

    print(f"\nSearching Supabase for {K_SIMILAR_CHUNKS} chunks with filters...")

    response = supabase_service.client.rpc(
        'match_chunks',
        {
            'query_embedding': query_embedding,
            'match_count': K_SIMILAR_CHUNKS,
            'user_id': str(authenticated_user_id_uuid),

            'p_doc_specific_type': doc_specific_type_filter,
            'p_company_name': company_name_filter,
            'p_doc_year_start': doc_year_start_filter,
            'p_doc_year_end': doc_year_end_filter,
            'p_doc_quarter': doc_quarter_filter
        }
    ).execute()

    retrieved_chunks = response.data
    if retrieved_chunks:
        print(f"Retrieved {len(retrieved_chunks)} chunks.")
        print("\n--- Retrieved Chunks ---")
        for i, chunk in enumerate(retrieved_chunks):
            score = chunk.get('similarity_score', 'N/A')
            doc_id = chunk.get('document_id')
            section_heading = chunk.get('section_heading') or 'Unknown Section'
            doc_type = chunk.get('doc_specific_type')
            doc_year = chunk.get('doc_year')
            doc_quarter = chunk.get('doc_quarter')
            company_name = chunk.get('company_name')
            chunk_text = chunk.get('chunk_text', '')

            print(f"\nChunk {i+1}:")
            print(f"  Score (Cosine Distance): {score:.4f}")
            print(f"  Document ID: {doc_id}")
            print(f"  Section: {section_heading}")
            print(f"  Metadata: Type={doc_type}, Year={doc_year}, Quarter={doc_quarter}, Company={company_name}")
            print(f"  Text: {chunk_text}\n")

        print("\n--- End Retrieved Chunks ---")

    else:
        print("No chunks retrieved based on the provided query and filters.")
        if hasattr(response, 'error') and response.error:
             print(f"Supabase query error: {response.error}")

except Exception as e:
    print(f"An error occurred during Supabase search: {e}")

Test Email: wbryanlai@gmail.com
Query: 'Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021?'
Retrieving 5 chunks.

Applying Filters:
  Specific Type: Annual Report
  Company Name (contains): tesla
  Year Range: 2021 - 2021
  Quarter: None
Authentication successful. User ID: e222921f-cfdc-4a05-8cf2-aea13004bcf2
Initialized OpenAI client with model: text-embedding-3-small
SupabaseService initialized with provided client.

Searching Supabase for 5 chunks with filters...
Retrieved 4 chunks.

--- Retrieved Chunks ---

Chunk 1:
  Score (Cosine Distance): 0.3063
  Document ID: a468ce48-18ab-4eee-a0ea-2040c3569b65
  Section: FORM 10-K
  Metadata: Type=Annual Report, Year=2021, Quarter=4, Company=Tesla, Inc.
  Text: ## FORM 10-K

(Mark One)
☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE
ACT OF 1934

For the fiscal year ended December 31, 2021
OR

TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES
EXCHANGE ACT OF 1934

For t