In [1]:
# Add project root to sys.path
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import os
import uuid
import json
from dotenv import load_dotenv
from supabase import create_client, Client
from google.genai import types

from src.llm.OpenAIClient import OpenAIClient
from src.llm.GeminiClient import GeminiClient
from src.storage.SupabaseService import SupabaseService
from src.enums import FinancialDocSpecificType

def retrieve_financial_chunks(
    query_text: str,
    user_id: str,
    match_count: int = 5,
    doc_specific_type: str = None,
    company_name: str = None,
    doc_year_start: int = None,
    doc_year_end: int = None,
    doc_quarter: int = None
) -> str:
    """
    Retrieves relevant financial document chunks from Supabase based on a query
    and optional metadata filters for a specific user.

    Returns:
        A JSON string representation of the list of retrieved chunk dictionaries,
        or an error message string.
    """
    print(f"\n--- Executing Tool: retrieve_financial_chunks ---")
    print(f"  Query: '{query_text}'")
    print(f"  User ID: {user_id}")
    print(f"  Filters: Type={doc_specific_type}, Company={company_name}, Year={doc_year_start}-{doc_year_end}, Qtr={doc_quarter}")

    global openai_client, supabase_service
    try:
        query_embedding_list = openai_client.get_embeddings([query_text])
        if not query_embedding_list:
            print(f"  Error: Failed to generate query embedding.")
            return json.dumps({"error": "Failed to generate query embedding."})
        query_embedding = query_embedding_list[0]
        print(f"  Query embedding generated.")

        print(f"  Calling Supabase RPC 'match_chunks' with params: match_count={match_count}, user_id={user_id}, p_doc_specific_type={doc_specific_type}, p_company_name={company_name}, p_doc_year_start={doc_year_start}, p_doc_year_end={doc_year_end}, p_doc_quarter={doc_quarter}")
        response = supabase_service.client.rpc(
            'match_chunks',
            {
                'query_embedding': query_embedding,
                'match_count': match_count,
                'user_id': user_id,
                'p_doc_specific_type': doc_specific_type,
                'p_company_name': company_name,
                'p_doc_year_start': doc_year_start,
                'p_doc_year_end': doc_year_end,
                'p_doc_quarter': doc_quarter
            }
        ).execute()

        if response.data:
            print(f"  Retrieved {len(response.data)} chunks from Supabase.")
            # Convert UUIDs to strings for JSON serialization
            processed_data = []
            for chunk in response.data:
                 processed_chunk = {}
                 for key, value in chunk.items():
                     if isinstance(value, uuid.UUID):
                         processed_chunk[key] = str(value)
                     elif isinstance(value, dict) or isinstance(value, list): # Handle nested structures
                         processed_chunk[key] = json.dumps(value) # Convert complex types to string/JSON string
                     else:
                         processed_chunk[key] = value
                 processed_data.append(processed_chunk)

            result_json_string = json.dumps(processed_data, indent=2) # Use indent for readability
            print(f"  Returning JSON result (first 500 chars):\n{result_json_string[:500]}...")
            return result_json_string
        elif hasattr(response, 'error') and response.error:
             error_msg = f"Supabase query error: {response.error}"
             print(f"  Error: {error_msg}")
             return json.dumps({"error": error_msg})
        else:
            print("  No chunks retrieved from Supabase.")
            return json.dumps([])

    except Exception as e:
        error_msg = f"An unexpected error occurred during chunk retrieval: {e}"
        print(f"  Error: {error_msg}")
        return json.dumps({"error": error_msg})


# --- Define Function Declaration ---
retrieve_chunks_declaration = {
    "name": "retrieve_financial_chunks",
    "description": "Searches and retrieves relevant text chunks from the user's uploaded financial documents based on their query and optional filters like company name, document type, year range, or quarter. Always use this tool to find information before answering questions about the user's financial documents.",
    "parameters": {
        "type": "object",
        "properties": {
            "query_text": {
                "type": "string",
                "description": "The user's original question or a refined search query based on their question.",
            },
            "match_count": {
                "type": "integer",
                "description": "The maximum number of relevant text chunks to return. Default is 5.",
            },
            "doc_specific_type": {
                "type": "string",
                "description": f"Filter results to a specific document type. Examples: {', '.join([item.value for item in FinancialDocSpecificType if item != FinancialDocSpecificType.UNKNOWN and item.value is not None])}. Leave empty if no specific type is mentioned.",
            },
            "company_name": {
                "type": "string",
                "description": "Filter results to a specific company name mentioned in the query. Use the most likely name if variations exist (e.g., 'Tesla' for 'Tesla, Inc.'). Leave empty if no company is mentioned.",
            },
            "doc_year_start": {
                "type": "integer",
                "description": "The starting fiscal year for filtering (e.g., 2021). Extract from the user's query if a year or date range is specified. Leave empty if no start year is specified.",
            },
            "doc_year_end": {
                "type": "integer",
                "description": "The ending fiscal year for filtering (e.g., 2021). Use the same year as start year if only one year is mentioned. Leave empty if no end year is specified.",
            },
            "doc_quarter": {
                "type": "integer",
                "description": "Filter results to a specific fiscal quarter (1, 2, 3, or 4). Extract if mentioned in the query. Use -1 or leave empty if no quarter is mentioned.",
            },
        },
        "required": ["query_text"]
    },
}


# --- Configuration and Globals ---
load_dotenv()
TEST_EMAIL = os.environ.get("TEST_EMAIL")
TEST_PASSWORD = os.environ.get("TEST_PASSWORD")
if not TEST_EMAIL or not TEST_PASSWORD:
    raise ValueError("TEST_EMAIL and TEST_PASSWORD must be set.")

# Keep track of conversation history
conversation_history = []

# --- User Query ---
user_query = "Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021? Create a report of tesla for 2021 in markdown i can copy."
print(f"\n--- User Query ---")
print(user_query)

# --- Initialize Clients and Authenticate ---
try:
    print("\n--- Initializing clients and authenticating ---")
    # Using your existing client initializations
    openai_client = OpenAIClient()
    gemini_client = GeminiClient()

    supabase_url = os.environ.get("SUPABASE_URL")
    supabase_key = os.environ.get("SUPABASE_ANON_KEY")
    if not supabase_url or not supabase_key:
        raise ValueError("Supabase credentials not set.")

    auth_client: Client = create_client(supabase_url, supabase_key)
    auth_response = auth_client.auth.sign_in_with_password(
        {"email": TEST_EMAIL, "password": TEST_PASSWORD}
    )
    if not auth_response or not auth_response.user:
        raise ConnectionError("Supabase authentication failed.")

    authenticated_user_id_str = str(auth_response.user.id)
    print(f"Authentication successful. User ID: {authenticated_user_id_str}")

    supabase_service = SupabaseService(supabase_client=auth_client)
    print("Clients initialized and authenticated.")

except Exception as e:
    print(f"Initialization or Authentication Error: {e}")
    sys.exit(1)

# --- Define the Tool for Gemini ---
retrieval_tool = types.Tool(function_declarations=[retrieve_chunks_declaration])
gemini_model_name = "gemini-2.5-flash-preview-04-17" 


# --- Function Calling Loop ---
try:
    # 1. First call to LLM: Send user query and retrieval tool
    print(f"\n--- Sending initial query to Gemini ({gemini_model_name}) ---")
    # Add user query to history
    conversation_history.append(types.Content(role="user", parts=[types.Part(text=user_query)]))
    print(f"  Conversation History before first call:\n{conversation_history}")

    response = gemini_client.client.models.generate_content(
        model=gemini_model_name,
        contents=conversation_history,
        config=types.GenerateContentConfig(tools=[retrieval_tool])
    )

    print(f"\n--- Received response from first Gemini call ---")
    # Print the full response structure for inspection
    print(response)

    # Add model's response (potential function call) to history
    if response.candidates and response.candidates[0].content.parts:
        model_response_content = response.candidates[0].content
        conversation_history.append(model_response_content)
        print(f"\n  Model's response content added to history:\n{model_response_content}")
        message_part = model_response_content.parts[0]
    else:
        print("Error: Unexpected response structure or no candidates/parts from Gemini.")
        message_part = None
        # Depending on severity, you might want to raise or handle differently
        sys.exit(1)


    # 2. Check if LLM requested a function call
    if message_part and hasattr(message_part, 'function_call') and message_part.function_call:
        function_call = message_part.function_call
        print(f"\n--- Gemini requested function call: '{function_call.name}' ---")
        print(f"  Raw Arguments from LLM: {function_call.args}")

        # 3. Execute the function if it's the one we defined
        if function_call.name == "retrieve_financial_chunks":
            print("  Recognized 'retrieve_financial_chunks' call.")
            # Extract arguments from the LLM's request
            tool_args = dict(function_call.args)
            print(f"  Extracted tool_args before adding user_id: {tool_args}")

            # --- IMPORTANT: Add authenticated user_id ---
            # The user_id comes from authentication, not from the LLM's interpretation of the query.
            # It must be passed to our internal function for RLS.
            tool_args['user_id'] = authenticated_user_id_str
            print(f"  Tool_args *including* user_id for execution: {tool_args}")


            # Call the actual Python function
            function_result_json = retrieve_financial_chunks(**tool_args)
            print(f"\n--- Finished executing retrieve_financial_chunks ---")
            # print(f"  Function result (JSON string): {function_result_json}") # Might be very long, print first few chars or summary


            # 4. Second call to LLM: Send function result back
            print("\n--- Sending function result back to Gemini ---")
            # Create the function response part
            function_response_part = types.Part.from_function_response(
                name=function_call.name,
                response={"result": json.loads(function_result_json)} # Send back as parsed JSON object, not string
            )
            print(f"  Function response part being sent:\n{function_response_part}")

            # Add function response to history
            conversation_history.append(
                 types.Content(role="user", parts=[function_response_part])
            )
            print(f"\n  Conversation History before second call:\n{conversation_history}")


            # Generate final response using the function result context
            final_response = gemini_client.client.models.generate_content(
                model=gemini_model_name,
                contents=conversation_history,
                config=types.GenerateContentConfig(tools=[retrieval_tool]) # Keep tool available
            )

            print(f"\n--- Received response from second Gemini call (Final Answer) ---")
            print(final_response) # Print full final response structure

            # Add final model response to history
            if final_response.candidates and final_response.candidates[0].content.parts:
                 final_model_response_content = final_response.candidates[0].content
                 conversation_history.append(final_model_response_content)
                 print(f"\n  Final model's response content added to history:\n{final_model_response_content}")

                 print("\n--- Final Answer Text from Gemini ---")
                 print(final_response.text)
            else:
                 print("Error: No final response text found after sending function result.")


        else:
            print(f"Warning: LLM requested unknown function '{function_call.name}'")
            # You might want to inform the user or log this
            # For a simple notebook, we stop here
            print("Stopping execution due to unknown function call.")


    else:
        # LLM decided to answer directly without calling the function
        print("\n--- Gemini decided to answer directly (No Function Call Requested) ---")
        if hasattr(message_part, 'text'):
            print(message_part.text)
        else:
            print("No text response found in the initial call.")
        print("Stopping execution after direct answer.")


except Exception as e:
    print(f"\nAn unexpected error occurred during the Gemini interaction: {e}")


--- User Query ---
Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021? Create a report of tesla for 2021 in markdown i can copy.

--- Initializing clients and authenticating ---
Initialized OpenAI client with model: text-embedding-3-small
Initializing Gemini client with API key: AIz...yQ
Authentication successful. User ID: e222921f-cfdc-4a05-8cf2-aea13004bcf2
SupabaseService initialized with provided client.
Clients initialized and authenticated.

--- Sending initial query to Gemini (gemini-2.5-flash-preview-04-17) ---
  Conversation History before first call:
[Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021? Create a report of tesla for 2021 in markdown i can copy.')], role='user')]

--- Received response from first Gemini call ---
candidates