In [None]:
# Add project root to sys.path
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import uuid
import json
from dotenv import load_dotenv
from supabase import create_client, Client
from google.genai import types
import traceback

from src.llm.OpenAIClient import OpenAIClient
from src.llm.GeminiClient import GeminiClient
from src.storage.SupabaseService import SupabaseService
from src.enums import FinancialDocSpecificType
from src.prompts.prompt_manager import PromptManager
from src.helper.llm_helper_chat import print_final_formatted_answer, serialize_conversation_history, format_chunks_for_llm


# --- Global Client Variables ---
openai_client: OpenAIClient = None
gemini_client: GeminiClient = None # Main pipeline function will take this as an argument
supabase_service: SupabaseService = None
authenticated_user_id_str: str = None
auth_client: Client = None # Supabase auth client

# --- Configuration Constants ---
YOUR_APP_DOMAIN = "www.stackifier.com"

# --- PROMPT FUNCTION: Craft instructions for final answer + citation links ---
def create_final_answer_instructions(user_original_query: str, formatted_snippets_text: str) -> str:
    instructions = PromptManager.get_prompt(
        "citation_answer", user_original_query = user_original_query,  formatted_snippets_text = formatted_snippets_text, YOUR_APP_DOMAIN = YOUR_APP_DOMAIN
    )
    return instructions

# --- TOOL FUNCTION: Retrieve Financial Chunks ---
def retrieve_financial_chunks(
    query_text: str,
    user_id: str, # This user_id is injected by the calling pipeline, not from LLM
    match_count: int = 5,
    doc_specific_type: str = None,
    company_name: str = None,
    doc_year_start: int = None,
    doc_year_end: int = None,
    doc_quarter: int = None
) -> str:
    print(f"\n--- Executing Tool: retrieve_financial_chunks ---")
    print(f"  Query: '{query_text}'")
    print(f"  User ID for retrieval: {user_id}")
    print(f"  Filters: Type={doc_specific_type}, Company={company_name}, Year={doc_year_start}-{doc_year_end}, Qtr={doc_quarter}")

    # openai_client and supabase_service are accessed globally as per original script design
    global openai_client, supabase_service
    if openai_client is None or supabase_service is None:
         return json.dumps({"error": "Global clients not initialized. OpenAI or Supabase service is None."})

    try:
        query_embedding_list = openai_client.get_embeddings([query_text])
        if not query_embedding_list:
            print(f"  Error: Failed to generate query embedding.")
            return json.dumps({"error": "Failed to generate query embedding."})
        query_embedding = query_embedding_list[0]
        print(f"  Query embedding generated.")

        print(f"  Calling Supabase RPC 'match_chunks'...")
        response = supabase_service.client.rpc(
            'match_chunks',
            {
                'query_embedding': query_embedding,
                'match_count': match_count,
                'user_id': user_id, # Pass the authenticated user_id to the RPC
                'p_doc_specific_type': doc_specific_type,
                'p_company_name': company_name,
                'p_doc_year_start': doc_year_start,
                'p_doc_year_end': doc_year_end,
                'p_doc_quarter': doc_quarter
            }
        ).execute()

        if response.data is not None:
            print(f"  Retrieved {len(response.data)} chunks from Supabase.")
            processed_data = []
            for chunk_dict in response.data:
                 processed_chunk = {}
                 for key, value in chunk_dict.items():
                     if isinstance(value, uuid.UUID):
                         processed_chunk[key] = str(value)
                     else:
                         processed_chunk[key] = value
                 if 'document_filename' not in processed_chunk or processed_chunk['document_filename'] is None:
                     print(f"  Warning: RPC 'match_chunks' did not return 'document_filename' for a chunk.")
                     processed_chunk['document_filename'] = 'RPC_Missing_Doc_Name'
                 if 'section_id' not in processed_chunk or processed_chunk['section_id'] is None:
                     print(f"  Warning: RPC 'match_chunks' did not return 'section_id' for a chunk.")
                     processed_chunk['section_id'] = str(processed_chunk.get('id', 'RPC_Missing_Section_ID'))
                 if 'chunk_text' not in processed_chunk:
                      processed_chunk['chunk_text'] = 'Chunk text missing from RPC.'
                 if 'section_heading' not in processed_chunk:
                      processed_chunk['section_heading'] = 'Section heading missing from RPC.'
                 processed_data.append(processed_chunk)
            result_json_string = json.dumps(processed_data, indent=2)
            print(f"  Returning JSON result ({len(result_json_string)} chars, first 500 for brevity):\n{result_json_string[:500]}...")
            return result_json_string
        elif hasattr(response, 'error') and response.error:
             error_msg = f"Supabase RPC 'match_chunks' error: {response.error.message if hasattr(response.error, 'message') else response.error}"
             print(f"  Error: {error_msg}")
             return json.dumps({"error": error_msg})
        else:
            print("  Received unexpected response structure from Supabase RPC 'match_chunks'.")
            # ... (error details)
            return json.dumps({"error": "Unexpected response from Supabase RPC. Data and error fields were not accessible or were None when expected."})
    except Exception as e:
        print(f"An unexpected error occurred during chunk retrieval: {str(e)}\n{traceback.format_exc()}")
        return json.dumps({"error": f"An unexpected error occurred during chunk retrieval: {str(e)}"})

# --- Define Function Declaration for Gemini ---
retrieve_chunks_declaration = {
    "name": "retrieve_financial_chunks",
    "description": "Searches and retrieves relevant text chunks from the user's uploaded financial documents based on their query and optional filters like company name, document type, year range, or quarter. Always use this tool to find information before answering questions about the user's financial documents.",
    "parameters": {
        "type": "object",
        "properties": {
            "query_text": {"type": "string", "description": "The user's original question or a refined search query."},
            "match_count": {"type": "integer", "description": "Max chunks to return. Default 5."},
            "doc_specific_type": {
                "type": "string", 
                "description": f"Specific document type. Examples: {', '.join([item.value for item in FinancialDocSpecificType if item != FinancialDocSpecificType.UNKNOWN and item.value is not None])}.",
                "enum": [', '.join([item.value for item in FinancialDocSpecificType if item != FinancialDocSpecificType.UNKNOWN and item.value is not None])]
                },
            "company_name": {"type": "string", "description": "Company name to filter by."},
            "doc_year_start": {"type": "integer", "description": "Starting fiscal year."},
            "doc_year_end": {"type": "integer", "description": "Ending fiscal year."},
            "doc_quarter": {
                "type": "integer", 
                "description": "Fiscal quarter (1-4).",
                },
        },
        "required": ["query_text"]
    },
}

# --- SETUP FUNCTION ---
def initialize_global_clients_and_authenticate():
    """
    Loads .env, initializes global clients (OpenAI, Gemini, Supabase),
    authenticates the test user, and sets global authenticated_user_id_str.
    """
    global openai_client, gemini_client, supabase_service, authenticated_user_id_str, auth_client
    load_dotenv()

    TEST_EMAIL = os.environ.get("TEST_EMAIL")
    TEST_PASSWORD = os.environ.get("TEST_PASSWORD")
    if not TEST_EMAIL or not TEST_PASSWORD:
        raise ValueError("TEST_EMAIL and TEST_PASSWORD must be set in your .env file.")

    try:
        print("\n--- Initializing clients and authenticating ---")
        openai_client = OpenAIClient()
        gemini_client = GeminiClient() # This will be used by the main pipeline function

        supabase_url = os.environ.get("SUPABASE_URL")
        supabase_key = os.environ.get("SUPABASE_ANON_KEY")
        if not supabase_url or not supabase_key:
            raise ValueError("SUPABASE_URL and SUPABASE_ANON_KEY must be set in your .env file.")

        auth_client = create_client(supabase_url, supabase_key)
        print("Supabase client created.")

        print(f"Attempting to sign in with email: {TEST_EMAIL}")
        auth_response = auth_client.auth.sign_in_with_password(
            {"email": TEST_EMAIL, "password": TEST_PASSWORD}
        )

        if not auth_response or not auth_response.user:
            error_detail = auth_response.error.message if hasattr(auth_response, 'error') and auth_response.error else "Unknown authentication error"
            raise ConnectionError(f"Supabase authentication failed: {error_detail}. Check credentials and Supabase Auth settings.")

        authenticated_user_id_str = str(auth_response.user.id)
        print(f"Authentication successful. User ID: {authenticated_user_id_str}")

        supabase_service = SupabaseService(supabase_client=auth_client)
        print("Clients initialized and authenticated.")

    except Exception as e:
        print(f"Initialization or Authentication Error: {str(e)}\n{traceback.format_exc()}")
        sys.exit(1)

# --- MAIN QUERY PROCESSING PIPELINE FUNCTION ---
def run_financial_query_pipeline(
    user_query_text: str,
    initial_conv_history: list,
    gemini_client_instance: GeminiClient, # Pass the initialized Gemini client
    auth_user_id: str, # Pass the authenticated user ID
    tool_definition: types.Tool,
    model_name_to_use: str
) -> tuple[str, list]:
    """
    Manages the multi-turn conversation with Gemini to answer a financial query,
    potentially using the retrieve_financial_chunks tool.
    Assumes openai_client and supabase_service are available globally for retrieve_financial_chunks.
    Returns the final answer text and the updated conversation history.
    """
    current_conv_history = list(initial_conv_history) # Work on a mutable copy

    try:
        # 1. First call to LLM: Send user query and retrieval tool definition
        print(f"\n--- Sending initial query to Gemini ({model_name_to_use}) ---")
        current_conv_history.append(types.Content(role="user", parts=[types.Part(text=user_query_text)]))
        print(f"  Conversation History before first call:\n{json.dumps(serialize_conversation_history(current_conv_history), indent=2)}")

        response = gemini_client_instance.client.models.generate_content(
            model=model_name_to_use,
            contents=current_conv_history,
            config=types.GenerateContentConfig(
                tools=[tool_definition],
                automatic_function_calling= {"disable": True},
                tool_config= {"function_calling_config": {"mode": "any"}},
                temperature=0
                )
        )
        print(f"\n--- Received response from first Gemini call ---")

        if not response.candidates or not response.candidates[0].content or not response.candidates[0].content.parts:
            error_msg = "Error: Unexpected response structure or no candidates/parts from Gemini's first call."
            print(error_msg)
            if hasattr(response, 'prompt_feedback') and response.prompt_feedback: print(f"Prompt Feedback: {response.prompt_feedback}")
            return error_msg, current_conv_history

        model_response_content = response.candidates[0].content
        message_part = model_response_content.parts[0]
        current_conv_history.append(model_response_content)
        print(f"\n  Model's response content (first call) added to history.")

        # 2. Check if LLM requested a function call
        if hasattr(message_part, 'function_call') and message_part.function_call:
            function_call = message_part.function_call
            print(f"\n--- Gemini requested function call: '{function_call.name}' ---")
            tool_args = dict(function_call.args)
            print(f"  Raw Arguments from LLM: {tool_args}")

            if function_call.name == "retrieve_financial_chunks":
                print("  Recognized 'retrieve_financial_chunks' call.")
                tool_args['user_id'] = auth_user_id # Inject authenticated user_id
                print(f"  Tool_args *including* user_id for execution: {tool_args}")

                function_result_json = retrieve_financial_chunks(**tool_args)
                print(f"\n--- Finished executing retrieve_financial_chunks ---")
                print(f"  Function result (JSON string, first 500 chars):\n{function_result_json[:500]}...")

                print("\n--- Preparing enriched context and instructions for final Gemini call ---")
                try:
                    function_response_data = json.loads(function_result_json)
                except json.JSONDecodeError:
                    print(f"Error decoding function result JSON: {function_result_json[:200]}...")
                    function_response_data = {"error": "Invalid JSON from tool."}

                function_response_part = types.Part.from_function_response(
                    name=function_call.name,
                    response={"result": function_response_data}
                )
                current_conv_history.append(types.Content(role="user", parts=[function_response_part]))
                print(f"  Raw function response part added to history.")

                formatted_snippets_text = format_chunks_for_llm(function_result_json)
                final_instructions_text = create_final_answer_instructions(user_query_text, formatted_snippets_text)
                current_conv_history.append(types.Content(role="user", parts=[types.Part(text=final_instructions_text)]))
                print(f"  Formatted snippets and citation instructions added to history.")
                print(f"\n  Conversation History before second call (final answer generation):\n{json.dumps(serialize_conversation_history(current_conv_history), indent=2)}")

                final_response = gemini_client_instance.client.models.generate_content(
                    model=model_name_to_use,
                    contents=current_conv_history
                )
                print(f"\n--- Received response from second Gemini call (Final Answer) ---")

                if final_response.candidates and final_response.candidates[0].content and final_response.candidates[0].content.parts:
                    final_model_response_content = final_response.candidates[0].content
                    current_conv_history.append(final_model_response_content)
                    return final_response.text, current_conv_history
                else:
                    error_msg = "Error: No final response text found after sending function result."
                    print(error_msg)
                    if hasattr(final_response, 'prompt_feedback') and final_response.prompt_feedback: print(f"Final Response Prompt Feedback: {final_response.prompt_feedback}")
                    return error_msg, current_conv_history
            else:
                warning_msg = f"Warning: LLM requested unknown function '{function_call.name}'. Stopping execution due to unknown function call."
                print(warning_msg)
                return warning_msg, current_conv_history
        else:
            print("\n--- Gemini decided to answer directly (No Function Call Requested) ---")
            if hasattr(message_part, 'text') and message_part.text is not None:
                print(message_part.text)
                # current_conv_history already has model_response_content from first call
                return message_part.text, current_conv_history
            else:
                no_text_msg = "No text response found in the initial call and no function call made. Stopping execution after direct answer or unexpected initial response."
                print(no_text_msg)
                return no_text_msg, current_conv_history
    except Exception as e:
        error_msg = f"\nAn unexpected error occurred during the Gemini interaction: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        return error_msg, current_conv_history


import time
# --- MAIN EXECUTION ---
if __name__ == "__main__":
    # Initalize all variables
    initialize_global_clients_and_authenticate() # Sets up global clients and user ID
    
    # User Query and Conversation History
    t1 = time.time()
    user_query_main = "Whats the Gross Carrying Amount for Total intangible assets for tesla in 2021? Create a report of tesla for 2021 in markdown i can copy."
    conversation_history_main = [] # Initialize fresh for each run, or load if continuing

    print(f"\n--- User Query ---")
    print(user_query_main)

    # Tool and Model Configuration
    retrieval_tool_main = types.Tool(function_declarations=[retrieve_chunks_declaration])
    # gemini_model_name_main = "gemini-2.0-flash-lite"
    gemini_model_name_main = "gemini-2.0-flash"
    # gemini_model_name_main = "gemini-2.5-flash-preview-04-17"

    if gemini_client is None or authenticated_user_id_str is None:
        print("Error: Global clients or user ID not initialized. Exiting.")
        sys.exit(1)

    # Run the main processing pipeline
    final_answer_text, updated_history = run_financial_query_pipeline(
        user_query_text=user_query_main,
        initial_conv_history=conversation_history_main,
        gemini_client_instance=gemini_client, # Use the globally initialized gemini_client
        auth_user_id=authenticated_user_id_str, # Use the globally set user ID
        tool_definition=retrieval_tool_main,
        model_name_to_use=gemini_model_name_main
    )
    t2 = time.time()

    # Update conversation history if you plan to continue the conversation
    conversation_history_main = updated_history

    # Print the final answer
    print_final_formatted_answer(final_answer_text)

    # Optionally, print the full conversation history for debugging
    print("\n--- Full Conversation History (Serialized) ---")
    print(json.dumps(serialize_conversation_history(conversation_history_main), indent=2))
    print(f"\n[TIMER] TOTAL ELAPSED: {(t2 - t1):.2f}s")