In [None]:
!pip install faiss-cpu
# OR if you have a GPU runtime and want GPU support:
# !pip install faiss-gpu

In [None]:
#@title 20 deep research = live search
import asyncio
import time
import json
import traceback # For detailed error logging
import re # For parsing
import numpy as np
import os
import glob # For listing session files
import hashlib
from typing import List, Dict, Any, Optional, Set, Tuple, Union

# --- Configuration Constants ---
VERTEX_PROJECT_ID = "ofworks-459904"
VERTEX_LOCATION = "us-central1"
EMBEDDING_API_CALL_DELAY_SECONDS = 5.0 # For individual embedding calls

GEMINI_CONFIG = {
    'flash_lite': { 'model_name': 'gemini-2.5-flash-preview-05-20', 'max_tokens': 50000, 'temperature': 0.3, 'top_p': 0.95, },
    'researcher': { 'model_name': 'gemini-2.5-flash-preview-05-20', 'max_tokens': 50000, 'temperature': 0.3, 'top_p': 0.95, },
    'planner': { 'model_name': 'gemini-2.5-flash-preview-05-20', 'max_tokens': 50000, 'temperature': 0.3, 'top_p': 0.95, },
    'creator': { # New config for the Creator Agent
        'model_name': 'gemini-2.5-flash-preview-05-20', # Use a powerful model
        'max_tokens': 50000, # Increased for potentially large artifact generation
        'temperature': 0.25, # Lower temperature for more deterministic JSON
        'top_p': 0.95,
    }
}


INITIAL_MAX_CYCLES_IF_NO_OTHER_STOP = 6 # As per your last test log
FAISS_DIMENSION = 768

SESSIONS_DIR = "research_sessions_data_v8" # Matching your 16th code
GLOBAL_RAG_DIR = os.path.join(SESSIONS_DIR, "global_rag")
FAISS_INDEX_GCS_BUCKET_NAME = "faiss_buck"
FAISS_INDEX_GCS_BLOB_NAME = "research_agent_v8/global_faiss.index" # Matching _v8
GLOBAL_DOC_STORE_GCS_BLOB_NAME = "research_agent_v8/global_document_store.json" # Matching _v8
GLOBAL_FAISS_MAP_GCS_BLOB_NAME = "research_agent_v8/global_faiss_map.json"    # Matching _v8
GLOBAL_DOC_STORE_PATH = os.path.join(GLOBAL_RAG_DIR, "local_fallback_document_store_v8.json")
GLOBAL_FAISS_MAP_PATH = os.path.join(GLOBAL_RAG_DIR, "local_fallback_faiss_map_v8.json")
TEMP_FAISS_INDEX_PATH = os.path.join(GLOBAL_RAG_DIR, "temp_faiss_staging_v8.index")
LOCAL_FAISS_INDEX_PATH = os.path.join(GLOBAL_RAG_DIR, "local_persistent_faiss_v8.index")


PLANNER_RAG_CONTEXT_TOP_K = 100
PLANNER_CANDIDATE_REFINE_RAG_TOP_K = 40
RAG_TOP_K = 50
CREATOR_AGENT_RAG_TOP_K = 80 # Number of blueprint chunks for Creator to retrieve per sub-task query
EMBEDDING_BATCH_SIZE = 20 # Kept from your 16th code

# --- SDK Availability Flags & Global SDK Variables (Initialized to False/None) ---
FAISS_AVAILABLE = False
GOOGLE_GENAI_SDK_AVAILABLE = False
GCS_AVAILABLE = False
VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY = False
TEXT_EMBEDDING_MODEL_CLASS_AVAILABLE = False # True if TextEmbeddingModel class is imported
GOOGLE_API_CORE_EXCEPTIONS_AVAILABLE = False

# These will hold the imported modules directly
faiss = None
genai = None
genai_types = None
storage = None
vertexai = None
TextEmbeddingModel = None
TextEmbeddingInput = None
google_api_core_exceptions = None

# --- SDK Imports and Global Initialization ---
print("--- Initializing SDKs ---")

# FAISS
try:
    import faiss as faiss_imported
    faiss = faiss_imported
    FAISS_AVAILABLE = True
    print("Successfully imported faiss.")
except ImportError:
    print("Warning: faiss library not found. RAG capabilities will be disabled.")

# Google Generative AI (for genai.Client)
try:
    from google import genai as genai_imported
    from google.generativeai import types as genai_types_imported
    genai = genai_imported
    genai_types = genai_types_imported
    GOOGLE_GENAI_SDK_AVAILABLE = True
    print("Successfully imported google.genai and google.generativeai.types.")
except ImportError:
    print("CRITICAL: Failed to import Google Generative AI SDK (google.genai). LLM calls via genai.Client will fail.")

# Google Cloud Storage
try:
    from google.cloud import storage as storage_imported
    storage = storage_imported
    GCS_AVAILABLE = True
    print("Successfully imported google-cloud-storage.")
except ImportError:
    print("Warning: google-cloud-storage library not found. GCS persistence will be disabled.")

# Vertex AI SDK (for Embeddings primarily)
try:
    import vertexai as vertexai_imported
    from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModelImport
    from vertexai.language_models import TextEmbeddingInput as VertexTextEmbeddingInputImport

    vertexai = vertexai_imported
    TextEmbeddingModel = VertexTextEmbeddingModelImport
    TextEmbeddingInput = VertexTextEmbeddingInputImport
    TEXT_EMBEDDING_MODEL_CLASS_AVAILABLE = True
    print("Successfully imported Vertex AI SDK components (vertexai, TextEmbeddingModel, TextEmbeddingInput).")

    # Global Vertex AI SDK Initialization (ONLY ONCE and if needed)
    if VERTEX_PROJECT_ID and VERTEX_LOCATION:
        try:
            print(f"Attempting to initialize Vertex AI SDK globally for project '{VERTEX_PROJECT_ID}' and location '{VERTEX_LOCATION}'...")
            vertexai.init(project=VERTEX_PROJECT_ID, location=VERTEX_LOCATION) # Use the imported 'vertexai'
            VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY = True
            print(f"Vertex AI SDK initialized globally for project '{VERTEX_PROJECT_ID}' and location '{VERTEX_LOCATION}'.")

            # Optional: Test pre-loading of embedding model (using 'gemini-embedding-001' as in your 16th code)
            if TEXT_EMBEDDING_MODEL_CLASS_AVAILABLE: # Redundant check, but safe
                try:
                    print("Attempting to test-load embedding model 'gemini-embedding-001' globally...")
                    _ = TextEmbeddingModel.from_pretrained("gemini-embedding-001")
                    # VERTEX_AI_EMBEDDING_MODEL_LOADED = True # This flag was in your 16th code, can be removed if not used elsewhere
                    print("Successfully tested pre-loading of 'gemini-embedding-001'.")
                except Exception as e_preload:
                    print(f"Warning: Could not test-load 'gemini-embedding-001': {e_preload}.")
        except Exception as e_vertex_init_global:
            print(f"CRITICAL ERROR initializing Vertex AI SDK globally: {e_vertex_init_global}")
            print("Ensure project/location are correct, Vertex AI API is enabled, and you have correct permissions.")
    else:
        print(f"Vertex AI SDK global initialization skipped: VERTEX_PROJECT_ID or VERTEX_LOCATION not set.")
except ImportError as e_sdk_main:
    print(f"CRITICAL WARNING: Failed to import core Vertex AI SDK components: {e_sdk_main}. Real embeddings will be disabled.")

# Google API Core Exceptions
try:
    from google.api_core import exceptions as google_api_exceptions_imported
    google_api_core_exceptions = google_api_exceptions_imported
    GOOGLE_API_CORE_EXCEPTIONS_AVAILABLE = True
    print("Successfully imported google.api_core.exceptions.")
except ImportError:
    print("Warning: google.api_core.exceptions not found. Specific API error handling might be less precise.")

# IPython Display Modules
IPYTHON_AVAILABLE = False
try:
    from IPython.display import display, HTML, clear_output
    IPYTHON_AVAILABLE = True
    print("IPython display modules loaded.")
except ImportError:
    def clear_output(wait=False): pass
    def display(obj): print(obj)
    class HTML:
        def __init__(self, data): self.data = data
        def __str__(self): return str(self.data)
        def _repr_html_(self): return str(self.data)
    print("IPython display modules not found. Using basic print for output.")

print("--- SDK Initialization Complete ---")

# --- Utility Functions ---
def sanitize_filename(query: str, max_len: int = 50) -> str:
    # ... (your sanitize_filename function) ...
    s = re.sub(r'[^\w\s-]', '', query).strip().lower()
    s = re.sub(r'[-\s]+', '-', s)
    return s[:max_len] if len(s) > max_len else (s or "unnamed_session")


# --- System Prompts --- (Identical to previous version, no changes needed here)
BASE_SYSTEM_PROMPT_CORE = """You are an Advanced AI Deep Research Agent & Strategic Problem Solver. Your primary mission is to conduct exceptionally comprehensive, multi-faceted, and in-depth research to answer complex user queries with profound understanding and insight. You will simulate an advanced reasoning-based problem-solving expert, leveraging information gathering tools. Your entire cognitive process, from initial query deconstruction to final synthesis and critical self-reflection, must be meticulously detailed within <inner_thoughts> tags. This internal monologue will be invisible to the end-user but will serve as a testament to your rigorous, human-like research methodology.

Formatting and Style for <inner_thoughts>:
- Granular Step-by-Step Reasoning: Each paragraph strictly one sentence, detailing one micro-step in your thought or action.
- Explicit Tool Use: Always state: "I will use the [Tool Name, e.g., Google Search tool / Internal Faiss Index] with the query/parameters: [...]". If using Google Search, you will be provided with the results. If using the Internal Faiss Index, the system will provide you with relevant document snippets.
- Embrace Failure & Iteration: Document failed search attempts or hypotheses and explain how they inform your next steps. Show the "tree-like path search" clearly.
- Quantitative & Qualitative Balance: Seek out data when relevant, but also understand and synthesize qualitative information like opinions and arguments.
- Relentless Exploration: Do not conclude prematurely. The goal is depth.

Final Output Format (Strictly Adhered To):
Your final response MUST be in the following format, and ONLY this format:
<inner_thoughts>
(Your detailed thought process for the current task.)
</inner_thoughts>
<final_answer>
(Your direct output for this specific task, structured as requested by the role. If outputting JSON, it must be RAW JSON within these tags, no markdown fences like ```json.)
</final_answer>
"""

PLANNER_AGENT_SYSTEM_PROMPT = BASE_SYSTEM_PROMPT_CORE + """
Your current role is **Strategic Research Planner with Progressive Refinement**.
Your task is to analyze the main user query and the comprehensive current state of the research knowledge base (which includes all findings, key insights, and identified knowledge gaps from both web searches and internal document searches).

CRITICAL INSTRUCTIONS FOR PROGRESSIVE RESEARCH:
1.  **Build Upon Previous Findings**: Every new query you suggest MUST logically follow from and aim to expand upon specific facts, fill identified gaps, or resolve contradictions noted in the 'COMPLETE RESEARCH FINDINGS' or 'Identified Knowledge Gaps' sections of the provided knowledge base summary. Explicitly state in your <inner_thoughts> how your proposed queries relate to previous findings or gaps.
2.  **Consider All Knowledge Sources**: The Knowledge Base may contain information from previous web searches AND from an internal document index (Faiss). Your planning should aim to synthesize and build upon information from ALL available sources. If the KB summary indicates strong leads from internal documents, consider if further refinement of internal searches is needed, but your primary output is for WEB search queries.
3.  **Avoid Repetition**: Do NOT suggest queries that are substantially similar to ones already listed as 'Completed Sub-Queries' in the knowledge base summary unless you are aiming to verify a specific contradiction or explore a nuanced aspect that previous searches missed. If so, justify this clearly in your <inner_thoughts>.
4.  **Drill Down Strategy**: Transition from broad exploratory searches (if any were initial) to more focused, investigative queries. If general topics are covered, your new queries should target detailed aspects, specific entities, or relationships uncovered.
5.  **Fill Knowledge Gaps**: Prioritize queries that directly address the 'Identified Knowledge Gaps Needing Investigation' section from the knowledge base summary. Your queries should be formulated to find answers to these specific open questions.
6.  **Test Hypotheses/Explore Leads**: If previous research suggests possibilities or unconfirmed leads, you can formulate queries to verify these or investigate them further.

QUERY FORMULATION RULES:
- Each query should aim to be MORE SPECIFIC or explore a DIFFERENT, LOGICALLY CONNECTED ANGLE than previous related queries.
- When possible, phrase queries to seek new information. For example, instead of "details about X", if "X overview" was done, try "specific impact of X on Y" or "comparative analysis of X and Z".
- Include enough context in your <inner_thoughts> to show the reasoning for each query based on the provided knowledge base.

Before suggesting any query, critically ask yourself:
- Does this query directly build upon a specific previous finding (from web or internal docs) or clearly address an identified knowledge gap from the KB summary?
- Is this query distinct enough from completed queries to yield new, valuable information?
- Will this query take the research significantly deeper or broader in a *meaningful* way?
- Am I proposing to re-search something that already has a satisfactory answer in the KB?

Output 1 to 3 highly targeted queries that progressively deepen understanding. These queries are primarily intended for WEB SEARCH.
If, after careful analysis of the *entire* knowledge base summary, you determine that the research is sufficiently comprehensive and no more *valuable progressive* web search queries can be formulated to significantly enhance the answer to the original user query, output an empty list: `[]` within the <final_answer> tags.

Example <final_answer> format for new searches:
<final_answer>
[
  "specific side effects of DrugX in elderly patients with condition Y based on 2023 clinical trials",
  "comparative efficacy of DrugX versus DrugZ for treating condition Y as reported in meta-analyses",
  "long-term patient outcomes for DrugX condition Y after 5 years of treatment"
]
</final_answer>

Example <final_answer> format if research is complete:
<final_answer>
[]
</final_answer>
"""

SEARCH_ANALYZER_AGENT_SYSTEM_PROMPT = BASE_SYSTEM_PROMPT_CORE + """
Your current role is **Comprehensive Search Result Analyzer & Multi-Source Information Synthesizer**.

You will be given the following inputs:
1.  A specific `search_query_executed` that you need to analyze.
2.  `INTERNAL KNOWLEDGE BASE (FAISS INDEX) FINDINGS`: A JSON list of objects, where each object represents a potentially relevant document snippet from an internal knowledge base. This list might be empty if no relevant internal documents were found.
3.  `EXTERNAL GOOGLE SEARCH FINDINGS`: A JSON list of objects, where each object represents a (simulated) Google Search result. This list will always be provided.

YOUR PRIMARY TASK IS TO:
1.  **Critically Analyze ALL Provided Information:**
    *   **Internal Faiss Index Results:** Review the provided document summaries/snippets. Extract key information *directly relevant* to the `search_query_executed`. For each relevant internal document, note its Document ID. Assess its relevance to the query (0.0 to 1.0).
    *   **Google Search Results:** Review the provided search result summaries. For each promising external source (select up to 3-5 most relevant from the provided list):
        *   Identify the source URL and title.
        *   Extract the most critical pieces of information or direct quotes that are *directly relevant* to the `search_query_executed`.
        *   Assess its relevance to the query (0.0 to 1.0) and briefly comment on its potential credibility if discernible.
2.  **Synthesize Findings:**
    *   Combine and synthesize the extracted information from *both* the Internal Faiss Index results AND the Google Search results into a coherent `overall_synthesis_for_this_query`. This synthesis should directly address the `search_query_executed`.
    *   Clearly attribute information to its source within the synthesis if helpful (e.g., "Internal document X suggests..., while external source Y states...").
3.  **Identify New Questions/Leads:**
    *   Based on the combined findings from all sources, identify any new questions, contradictions, or specific avenues for further research that arise directly from your analysis of *this specific `search_query_executed`*.

OUTPUT FORMAT:
YOUR ENTIRE RESPONSE WITHIN THE <final_answer> TAGS MUST BE A SINGLE, VALID JSON OBJECT.
DO NOT OUTPUT A LIST OF STRINGS OR ANY OTHER FORMAT.
THE JSON OBJECT MUST STRICTLY ADHERE TO THE FOLLOWING STRUCTURE, CONTAINING ALL SPECIFIED KEYS, even if some values are empty lists or null where appropriate (e.g., if no relevant internal documents were found, `internal_faiss_results_summary` would be `[]`).

The `google_search_needed_and_performed` field in your JSON output MUST always be `true`.

EXAMPLE OF USER CONTENT AND YOUR EXPECTED <final_answer>:

Hypothetical User Content Provided to You:
```
You are analyzing the query: "Impact of quantum computing on RSA encryption"

INTERNAL KNOWLEDGE BASE (FAISS INDEX) FINDINGS (Review and incorporate if relevant):
[
  {
    "internal_doc_id": "doc_qc_001",
    "summary_of_internal_doc_content": "Shor's algorithm, runnable on a sufficiently powerful quantum computer, can factor large integers, breaking RSA. Current quantum computers are too small and error-prone for practical RSA cracking.",
    "relevance_score_from_faiss": 0.95
  },
  {
    "internal_doc_id": "doc_crypto_standards_v2",
    "summary_of_internal_doc_content": "Post-quantum cryptography (PQC) standards are being developed by NIST. Several lattice-based and hash-based signature schemes are candidates. Transitioning will be a multi-year effort.",
    "relevance_score_from_faiss": 0.7
  }
]

EXTERNAL GOOGLE SEARCH FINDINGS (Review, select most relevant, and incorporate):
[
  {
    "source_url": "http://quantumthreat.example.com/shors_explained_rsa",
    "source_title": "Shor's Algorithm: The RSA Doomsday Machine - Explained",
    "key_info_provisional": "Detailed explanation of how Shor's algorithm exploits quantum parallelism to factor integers exponentially faster than classical algorithms, making RSA vulnerable. Discusses the number of qubits required.",
    "snippet_from_search_engine": "Shor's algorithm, if run on a large-scale quantum computer, would break RSA..."
  },
  {
    "source_url": "http://nist.pqc.example.gov/updates_2024",
    "source_title": "NIST PQC Standardization Update - March 2024",
    "key_info_provisional": "NIST has selected several algorithms for standardization in its Post-Quantum Cryptography project, including CRYSTALS-Kyber and CRYSTALS-Dilithium. Draft standards are expected soon.",
    "snippet_from_search_engine": "NIST announces new PQC algorithm selections for future cryptographic standards..."
  },
  {
    "source_url": "http://quantumrealitycheck.example.org/timeline_skeptic",
    "source_title": "Quantum Computing: Hype vs. Reality for Cryptography",
    "key_info_provisional": "Argues that while theoretically possible, building a quantum computer capable of breaking RSA-2048 is still decades away due to immense challenges in qubit coherence, error correction, and scaling.",
    "snippet_from_search_engine": "Debate continues on the practical timeline for quantum computers breaking RSA..."
  }
]

YOUR TASK:
Critically analyze ALL the provided information (Internal Faiss Index findings AND External Google Search findings).
Synthesize these findings into the comprehensive JSON output format specified by your role.
Remember, the 'google_search_needed_and_performed' field in your JSON output MUST be `true`.
```

Your <final_answer> for the example above:
<final_answer>
{
  "search_query_executed": "Impact of quantum computing on RSA encryption",
  "internal_faiss_results_summary": [
    {
      "internal_doc_id": "doc_qc_001",
      "relevance_score_to_query": 0.95,
      "key_extracted_info_from_internal": "Shor's algorithm, when run on a sufficiently powerful quantum computer, can break RSA by factoring large integers. However, current quantum computers are too small and error-prone for this to be a practical threat yet."
    },
    {
      "internal_doc_id": "doc_crypto_standards_v2",
      "relevance_score_to_query": 0.7,
      "key_extracted_info_from_internal": "NIST is actively developing Post-Quantum Cryptography (PQC) standards, with lattice-based and hash-based schemes as leading candidates, to prepare for the eventual quantum threat. Transitioning to PQC will be a significant, multi-year undertaking."
    }
  ],
  "google_search_needed_and_performed": true,
  "google_search_results_summary": [
    {
      "source_url": "http://quantumthreat.example.com/shors_explained_rsa",
      "source_title": "Shor's Algorithm: The RSA Doomsday Machine - Explained",
      "relevance_score": 0.9,
      "key_extracted_info": "Shor's algorithm leverages quantum parallelism for exponential speedup in integer factorization, rendering RSA vulnerable given a capable quantum computer. The number of stable qubits required is substantial."
    },
    {
      "source_url": "http://nist.pqc.example.gov/updates_2024",
      "source_title": "NIST PQC Standardization Update - March 2024",
      "relevance_score": 0.85,
      "key_extracted_info": "NIST has advanced its Post-Quantum Cryptography project by selecting algorithms like CRYSTALS-Kyber (for key establishment) and CRYSTALS-Dilithium (for digital signatures) for standardization, with draft standards anticipated."
    },
    {
      "source_url": "http://quantumrealitycheck.example.org/timeline_skeptic",
      "source_title": "Quantum Computing: Hype vs. Reality for Cryptography",
      "relevance_score": 0.75,
      "key_extracted_info": "Significant debate exists regarding the practical timeline for quantum computers to break widely used RSA encryption (e.g., RSA-2048), with some experts estimating it is still decades away due to major challenges in qubit coherence, error correction, and overall system scaling."
    }
  ],
  "overall_synthesis_for_this_query": "Quantum computing, particularly via Shor's algorithm, poses a fundamental long-term threat to RSA encryption's security by enabling efficient factorization of large integers. This is confirmed by both internal knowledge (doc_qc_001) and external sources (quantumthreat.example.com). While the theoretical vulnerability is clear, practical exploitation is currently hindered by the limited scale and high error rates of existing quantum computers, with timelines for cryptographically relevant quantum computers estimated to be at least a decade or more away (quantumrealitycheck.example.org, doc_qc_001). In response, standardization bodies like NIST are actively developing and selecting post-quantum cryptographic algorithms (e.g., CRYSTALS-Kyber, CRYSTALS-Dilithium) to ensure future security (doc_crypto_standards_v2, nist.pqc.example.gov).",
  "new_questions_or_leads_from_this_query": [
    "What are the specific qubit count and coherence time requirements for Shor's algorithm to break RSA-2048 or RSA-4096?",
    "What is the current status of the NIST PQC draft standards and their projected adoption timeline?",
    "Are there any emerging quantum algorithms, other than Shor's, that could potentially threaten current asymmetric cryptography?"
  ],
  "tool_errors": null
}
</final_answer>

If no relevant internal Faiss documents were found, `internal_faiss_results_summary` should be `[]`.
If the provided Google Search results are deemed not relevant after your analysis, `google_search_results_summary` should be `[]`.
However, `google_search_needed_and_performed` MUST always be `true`.
"""

CONSOLIDATOR_AGENT_SYSTEM_PROMPT = BASE_SYSTEM_PROMPT_CORE + """
Your current role is **Research Consolidator & Final Reporter**.
You will be given the complete research log, which includes the original user query, all search queries executed (if any research phase occurred), and the analyzed findings or created artifacts from those processes. This information will be presented as a comprehensive "Knowledge Base Summary".

YOUR TASK:
1.  **Understand the Original User Query:** Identify the core question(s) or objective(s) the user wants to achieve.
2.  **Synthesize ALL Available Information:** Review the *entire* "Knowledge Base Summary". This summary contains:
    *   The original user query.
    *   A log of detailed findings from any research (SearchAnalyzer outputs).
    *   A collection of artifacts generated by the Creator Agent (if that phase was triggered).
    *   A list of completed and pending sub-queries.
    *   Any errors encountered during the process.
3.  **Construct the Final Answer:**
    *   If the primary goal was **research and answering questions**, synthesize the findings from the `detailed_findings` log into a comprehensive, well-structured, and insightful answer to the original user query. Address all aspects of the query. Highlight key findings, different perspectives, and any limitations or gaps in the available information based on the research performed.
    *   If the primary goal was **creation/elaboration of design artifacts** (and the Creator Agent produced output now in `created_artifacts` within the KB summary), your main task is to present these created artifacts clearly. You might provide a brief introduction and conclusion around the presented artifacts. If the Creator Agent reported limitations or errors, summarize those as well.
    *   **Clarity and Attribution:** Clearly attribute information if sources are distinct and relevant (e.g., "Based on internal document X...", "External source Y suggested..."). For created artifacts, the structure itself is the primary output.
4.  **Output Format:** The output should be the final, polished answer intended for the end-user. It should be directly readable and understandable, presented within the `<final_answer>` tags. For complex outputs like architectural designs, ensure the structure is clear.

CRITICAL: Your output within `<final_answer>` should be the direct response to the user. If the process involved creating specific artifacts (like diagrams or formalizations), ensure these are clearly presented as part of your answer. If the research phase was skipped (e.g., for a pure creation task based on a loaded blueprint) or failed, focus on presenting the created artifacts or explaining the limitations.
"""

CREATOR_PLANNER_SYSTEM_PROMPT = BASE_SYSTEM_PROMPT_CORE + """
Your current role is **AI Design Project Manager (Creator - Planning Phase)**.
You are given:
1.  A high-level `elaboration_task` (the user's original query) that requires creating multiple detailed design artifacts for an existing AI architecture blueprint.
2.  A `blueprint_summary` containing the details of this AI architecture (e.g., "CognitoAbstractor V1").
3.  A `max_creation_cycles` number indicating how many iterative cycles are available for the creation process.

YOUR TASK:
1.  **Deconstruct the `elaboration_task`**: Identify all distinct design artifacts requested (e.g., high-level diagram, specific interaction protocol, CIAL formalization, knowledge schema, motivation signal logic).
2.  **Create a Step-by-Step Plan**: Formulate a plan to generate these artifacts iteratively over the available `max_creation_cycles`. Each step in your plan should be a manageable sub-task focused on generating one specific artifact or a significant portion of one.
    *   For each sub-task in your plan, briefly describe:
        *   `sub_task_id`: A unique, descriptive ID for the sub-task (e.g., "artifact_1_diagram_high_level").
        *   `sub_task_description`: What specific artifact or part will be created.
        *   `estimated_cycles`: How many cycles this sub-task might take (usually 1).
        *   `required_blueprint_info_keywords`: 2-3 keywords or phrases that, if used in a RAG query against the `blueprint_summary`, would retrieve the most relevant information from the blueprint needed for *this specific sub-task*. This helps focus information retrieval.
3.  **Output Format**: Your entire response within the <final_answer> tags MUST be a SINGLE, VALID JSON OBJECT.
    This JSON object MUST have a **single top-level key named "creation_plan"**.
    The value of "creation_plan" MUST be a JSON LIST of sub-task objects, where each sub-task object follows the schema described above.

CRITICAL OUTPUT EXAMPLE (Illustrates the full JSON object structure):
<final_answer>
{
  "creation_plan": [
    {
      "sub_task_id": "artifact_1_diagram",
      "sub_task_description": "Generate MermaidJS syntax for the high-level architectural diagram of CognitoAbstractor V1, showing main components and primary data/control flows based on the blueprint's description of the 'Central Workspace/Blackboard'.",
      "estimated_cycles": 1,
      "required_blueprint_info_keywords": ["CognitoAbstractor V1 components", "Central Workspace data flow", "module interactions diagram"]
    },
    {
      "sub_task_id": "artifact_2_protocol_novelty",
      "sub_task_description": "Detail the interaction protocol for the 'Novelty Detection & Causal Hypothesis Testing' scenario, focusing on steps 1-3: NPPRM novelty detection, DLSM curiosity trigger, and SRKIE initial symbolic grounding.",
      "estimated_cycles": 1,
      "required_blueprint_info_keywords": ["Novelty Detection protocol", "NPPRM function", "DLSM intrinsic motivation", "SRKIE symbolic grounding"]
    }
    // ... potentially more sub-tasks if requested and cycles allow
  ]
}
</final_answer>

Ensure your output is ONLY this JSON object. Do not include any other text before or after the JSON structure within the <final_answer> tags.
"""

CREATOR_EXECUTOR_SYSTEM_PROMPT = BASE_SYSTEM_PROMPT_CORE + """
Your current role is **AI Design Artifact Generator (Creator - Execution Phase)**.
You are given:
1.  A specific `current_sub_task` to complete, which is part of a larger design elaboration. This sub-task will describe a specific design artifact you need to create.
2.  `relevant_blueprint_context`: Snippets from the "CognitoAbstractor V1" architectural blueprint retrieved via RAG, specifically relevant to your `current_sub_task`. This is your primary information source.
3.  The original `elaboration_task_overview` for overall context.

YOUR TASK:
1.  **Understand the `current_sub_task`**: Clearly identify what specific artifact or piece of an artifact you need to generate.
2.  **Leverage `relevant_blueprint_context`**: Use the provided blueprint snippets as the factual basis for your generation. Your output MUST be consistent with and directly elaborate upon this context.
3.  **Generate the Artifact Piece**: Create the content for the `current_sub_task`.
    *   If the sub-task asks for a diagram (e.g., MermaidJS, PlantUML), provide the complete textual syntax for that diagram as a string.
    *   If it asks for a protocol, formalism, schema, or conceptual equation, provide a clear, detailed textual description.
4.  **Output Format**: Your entire response within the <final_answer> tags MUST be a SINGLE, VALID JSON OBJECT containing:
    *   `artifact_id`: The `sub_task_id` you were given.
    *   `generated_content`: The actual content of the design artifact piece you created (e.g., the Mermaid string, the textual description of the formalism).
    *   `confidence_score`: Your confidence (0.0-1.0) that you successfully completed the sub-task based *only* on the provided blueprint context.
    *   `notes_or_questions_for_next_step`: (Optional) Any brief notes, or if critical information was missing from the `relevant_blueprint_context` that is needed for *this specific sub-task*, phrase it as a question that could guide a RAG query against the full blueprint.

Example <final_answer> if generating a Mermaid diagram:
<final_answer>
{
  "artifact_id": "artifact_1_diagram",
  "generated_content": "graph TD\\n    A[CognitoAbstractor V1 Central Workspace] --> B(NPPRM);\\n    B --> A;\\n    A --> C(SRKIE);\\n    C --> A;\\n    A --> D(CIAL);\\n    D --> A;\\n    A --> E(ARM);\\n    E --> A;\\n    A --> F(DLSM);\\n    F --> A;",
  "confidence_score": 0.9,
  "notes_or_questions_for_next_step": "Blueprint context was clear for main components. Need more detail on specific data types exchanged for full sequence diagrams."
}
</final_answer>
"""


# --- Helper Function for Parsing LLM Response ---
def parse_llm_response(response_text: str) -> Tuple[str, str]:
    # ... (Keep your existing parse_llm_response function from 16th code) ...
    if not isinstance(response_text, str):
        print(f"Warning (parse_llm_response): Expected string input, got {type(response_text)}. Returning empty.")
        return "", f"ERROR: Invalid input to parser (expected string, got {type(response_text)})"
    thoughts = ""
    final_answer_str = ""
    thought_match = re.search(r"<inner_thoughts>(.*?)</inner_thoughts>", response_text, re.DOTALL | re.IGNORECASE)
    text_after_thoughts = response_text
    if thought_match:
        thoughts = thought_match.group(1).strip()
        text_after_thoughts = response_text[thought_match.end():].strip()
    else:
        text_after_thoughts = response_text.strip()
    answer_tag_match = re.search(r"<final_answer>(.*?)</final_answer>", text_after_thoughts, re.DOTALL | re.IGNORECASE)
    content_within_final_answer_tag = ""
    if answer_tag_match:
        content_within_final_answer_tag = answer_tag_match.group(1).strip()
    if content_within_final_answer_tag:
        temp_str = content_within_final_answer_tag
        if temp_str.startswith("```json"): temp_str = temp_str[len("```json"):].strip()
        if temp_str.endswith("```"): temp_str = temp_str[:-len("```")].strip()
        try: json.loads(temp_str); final_answer_str = temp_str
        except json.JSONDecodeError:
            embedded_json_match = re.search(r"(\{[\s\S]*?\}|\[[\s\S]*?\])", temp_str)
            if embedded_json_match:
                potential_json = embedded_json_match.group(0).strip()
                try: json.loads(potential_json); final_answer_str = potential_json
                except json.JSONDecodeError: final_answer_str = temp_str
            else: final_answer_str = temp_str
    if not final_answer_str or not (final_answer_str.strip().startswith("{") or final_answer_str.strip().startswith("[")):
        search_area = text_after_thoughts if answer_tag_match else response_text
        json_fenced_block_match = re.search(r"```json\s*(\{[\s\S]*?\}|\[[\s\S]*?\])\s*```", search_area, re.DOTALL)
        if json_fenced_block_match:
            potential_json_from_fence = json_fenced_block_match.group(1).strip()
            try: json.loads(potential_json_from_fence); final_answer_str = potential_json_from_fence
            except json.JSONDecodeError:
                if not final_answer_str: final_answer_str = content_within_final_answer_tag or search_area.strip()
    if not final_answer_str or not (final_answer_str.strip().startswith("{") or final_answer_str.strip().startswith("[")):
        search_area = text_after_thoughts if answer_tag_match else response_text
        raw_json_matches = list(re.finditer(r"(\{[\s\S]*?\}|\[[\s\S]*?\])", search_area))
        for match_obj in reversed(raw_json_matches):
            potential_json = match_obj.group(0).strip()
            try: json.loads(potential_json); final_answer_str = potential_json; break
            except json.JSONDecodeError: continue
        if not (final_answer_str and (final_answer_str.strip().startswith("{") or final_answer_str.strip().startswith("["))):
             if not final_answer_str: final_answer_str = content_within_final_answer_tag or search_area.strip()
    if not final_answer_str and response_text.strip().startswith("ERROR: LLM call failed"):
        final_answer_str = response_text.strip()
    if not final_answer_str and not thoughts and response_text.strip() and not response_text.strip().startswith("ERROR:"):
        print(f"Warning (parse_llm_response): No <final_answer> or JSON found. Using entire response: '{response_text[:100]}...'")
        final_answer_str = response_text.strip()
    return thoughts, final_answer_str.strip()



# --- KnowledgeBase Class (Session Specific) ---
class KnowledgeBase:
    def __init__(self, original_query: str, session_id: str):
        self.session_id: str = session_id
        self.original_query: str = original_query
        self.main_topics_identified: List[str] = []
        self.detailed_findings: List[Dict[str, Any]] = []
        self.pending_sub_queries: List[str] = [original_query]
        self.completed_sub_queries: Set[str] = set()
        self.session_sources_referenced: Set[Tuple[str, str]] = set()
        self.errors: List[Dict[str, Any]] = []
        self.query_dependencies: Dict[str, List[str]] = {}
        self.query_generation_reasons: Dict[str, str] = {}
        self.global_faiss_doc_count: int = 0
        self.creator_plan: List[Dict[str, Any]] = [] # Stores sub-tasks for the creator
        self.creator_completed_subtasks_count: int = 0
        self.created_artifacts: Dict[str, Any] = {} # Stores {artifact_name: content}

    def add_search_analysis(self, query_executed: str, analysis_data: Dict[str, Any]):
        try:
            source_type = "unknown_source"
            if "internal_faiss_results_summary" in analysis_data and "google_search_results_summary" in analysis_data :
                 source_type = "search_analyzer_synthesis"
            elif "internal_faiss_results_summary" in analysis_data:
                source_type = "rag_pre_analysis"
            elif "google_search_results_summary" in analysis_data:
                source_type = "web_search_analysis"


            finding = {"query": query_executed, "analysis": analysis_data, "source_type": source_type, "timestamp": time.time()}
            self.detailed_findings.append(finding)

            if query_executed in self.pending_sub_queries:
                self.pending_sub_queries.remove(query_executed)
            self.completed_sub_queries.add(query_executed)

            if isinstance(analysis_data.get("internal_faiss_results_summary"), list):
                for item in analysis_data["internal_faiss_results_summary"]:
                    if isinstance(item, dict) and "internal_doc_id" in item:
                        self.session_sources_referenced.add(("faiss", item["internal_doc_id"]))
            if isinstance(analysis_data.get("google_search_results_summary"), list):
                for item in analysis_data["google_search_results_summary"]:
                    if isinstance(item, dict) and "source_url" in item:
                        self.session_sources_referenced.add(("google", item["source_url"]))

            new_leads_key = "new_questions_or_leads_from_this_query"

            if isinstance(analysis_data.get(new_leads_key), list):
                for q_new_candidate in analysis_data[new_leads_key]:
                    if isinstance(q_new_candidate, str) and q_new_candidate.strip():
                        q_new = q_new_candidate.strip()
                        self.add_dependent_query(
                            new_query=q_new,
                            parent_queries=[query_executed],
                            reason=f"Identified as a new lead from {source_type} of '{query_executed}'"
                        )
            if self.original_query in self.completed_sub_queries and self.original_query in self.pending_sub_queries:
                 self.pending_sub_queries.remove(self.original_query)

        except Exception as e:
            error_msg = f"Error in add_search_analysis for query '{query_executed}': {e}"
            print(error_msg); traceback.print_exc()
            self.add_error("KnowledgeBase", error_msg, {"query": query_executed, "analysis_data_preview": str(analysis_data)[:200]})

    def add_dependent_query(self, new_query: str, parent_queries: List[str], reason: str):
        try:
            if not isinstance(new_query, str) or not new_query.strip(): return
            valid_parent_queries = [p for p in parent_queries if isinstance(p, str) and p.strip()]
            self.query_dependencies[new_query] = valid_parent_queries
            self.query_generation_reasons[new_query] = reason if isinstance(reason, str) else "No specific reason provided."
            if new_query not in self.pending_sub_queries and new_query not in self.completed_sub_queries:
                self.pending_sub_queries.append(new_query)
        except Exception as e:
            error_msg = f"Error in add_dependent_query for '{new_query}': {e}"; print(error_msg)
            self.add_error("KnowledgeBase", error_msg, {"new_query": new_query, "parent_queries": parent_queries})

    def add_error(self, agent_name: str, error_message: str, details: Optional[Dict[str, Any]] = None):
        self.errors.append({"agent": agent_name, "message": error_message, "details": details or {}, "timestamp": time.time()})

    def get_summary_for_planner(self, max_len: int = 8000) -> str:
        try:
            summary = f"Current Research Session ID: {self.session_id}\nOriginal Query for this Session: {self.original_query}\n"
            summary += f"Main Topics Initially Identified (if any): {', '.join(self.main_topics_identified) or 'None yet'}\n\n"
            summary += f"=== CURRENT KNOWLEDGE BASE STATE (Session: {self.session_id}) ===\n"
            summary += f"Pending Sub-Queries ({len(self.pending_sub_queries)}): {self.pending_sub_queries[:5]}{'...' if len(self.pending_sub_queries) > 5 else ''}\n"
            completed_queries_list = list(self.completed_sub_queries)
            summary += f"Completed Sub-Queries ({len(completed_queries_list)}): {completed_queries_list[:10]}{'...' if len(completed_queries_list) > 10 else ''}\n"
            summary += f"Total Unique Sources Referenced This Session: {len(self.session_sources_referenced)}.\n"
            summary += f"Total Documents in Global Faiss Store: {self.global_faiss_doc_count}.\n\n"

            summary += f"=== COMPLETE RESEARCH FINDINGS & INSIGHTS (Most recent first, this session) ===\n"
            key_insights_found = []
            all_identified_gaps = []
            if not self.detailed_findings:
                summary += "No detailed research findings have been logged in this session yet.\n"
            for finding_idx, finding in enumerate(reversed(self.detailed_findings)):
                query_executed = finding.get("query", "N/A")
                analysis = finding.get("analysis", {})
                source_type = finding.get("source_type", "unknown_source")

                synthesis = analysis.get("overall_synthesis_for_this_query", "No synthesis for this finding.")

                summary += f"\n--- Finding from {source_type.replace('_',' ').title()} for Query: \"{query_executed}\" ---\n"
                summary += f"Synthesis: {synthesis[:500]}{'...' if len(synthesis) > 500 else ''}\n"
                if synthesis and len(synthesis) > 30:
                    key_insights_found.append(f"From '{query_executed}' ({source_type}): {synthesis[:300]}...")

                internal_src_count = len(analysis.get("internal_faiss_results_summary", [])) if isinstance(analysis.get("internal_faiss_results_summary"),list) else 0
                google_src_count = len(analysis.get("google_search_results_summary", [])) if isinstance(analysis.get("google_search_results_summary"),list) else 0

                if internal_src_count > 0: summary += f"  (Consulted {internal_src_count} internal docs)\n"
                if google_src_count > 0: summary += f"  (Consulted {google_src_count} Google Search results)\n"

                new_leads_or_gaps = analysis.get("new_questions_or_leads_from_this_query", [])
                if new_leads_or_gaps:
                    summary += f"  Leads/Gaps from this query: {new_leads_or_gaps[:3]}{'...' if len(new_leads_or_gaps)>3 else ''}\n"
                    all_identified_gaps.extend(q for q in new_leads_or_gaps if isinstance(q, str) and q.strip())
                if finding_idx >= 9:
                    summary += "\n...(older findings omitted for planner summary brevity)...\n"; break

            summary += f"\n=== SUMMARY OF KEY INSIGHTS DISCOVERED (Overall - up to 10 recent) ===\n"
            if key_insights_found:
                for insight in key_insights_found[:10]: summary += f"• {insight}\n"
            else: summary += "No significant key insights extracted yet in this session.\n"

            summary += f"\n=== IDENTIFIED KNOWLEDGE GAPS OR LEADS FOR FURTHER INVESTIGATION (Overall - unique, up to 8) ===\n"
            unique_gaps = list(set(g for g in all_identified_gaps if isinstance(g, str) and g.strip()))
            if unique_gaps:
                for gap_idx, gap in enumerate(unique_gaps[:8]): summary += f"• {gap}\n"
            else: summary += "No specific knowledge gaps or new leads identified from analyses in this session yet.\n"

            summary += f"\n=== STRATEGIC GUIDANCE FOR NEXT SEARCHES (Reminder for Planner Role) ===\n"
            summary += "- Focus on building upon specific facts/findings from the 'COMPLETE RESEARCH FINDINGS' section.\n"
            summary += "- Prioritize queries that directly address the 'IDENTIFIED KNOWLEDGE GAPS'.\n"
            summary += "- Ensure new queries are more specific or explore new, connected angles, avoiding repetition of 'Completed Sub-Queries'.\n"
            summary += "- Drill down into details rather than re-exploring broad topics already covered.\n"

            if not self.pending_sub_queries and self.completed_sub_queries:
                summary += "\nSTATUS: All previously identified sub-queries for this session seem to be completed. Evaluate carefully if the original query is fully addressed or if deeper, more specific drill-down queries are needed based on the insights and remaining subtle gaps.\n"
            return summary[:max_len]
        except Exception as e:
            error_msg = f"Error generating planner summary: {e}"; print(error_msg)
            self.add_error("KBSummaryPlanner", error_msg)
            return f"Original Query: {self.original_query}\nError: Unable to generate detailed summary."

    def get_summary_for_consolidator(self, max_len: int = 30000) -> str:
        try:
            summary = f"Current Research Session ID: {self.session_id}\nOriginal User Query for this session: {self.original_query}\n\n"
            summary += f"Total Documents in Global RAG Index: {self.global_faiss_doc_count}\n\n"
            summary += "=== DETAILED FINDINGS LOG (This Session) ===\n"
            if not self.detailed_findings:
                summary += "No detailed research findings have been logged in this session.\n"

            for i, finding in enumerate(self.detailed_findings):
                query_executed = finding.get("query", "N/A")
                analysis = finding.get("analysis", {})
                source_type = finding.get("source_type", "unknown_source")

                synthesis = analysis.get("overall_synthesis_for_this_query", "No synthesis for this finding.")


                summary += f"\n--- Finding {i+1} for Query: \"{query_executed}\" (Source Type: {source_type.replace('_',' ').title()}) ---\n"
                summary += f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(finding.get('timestamp', 0)))}\n"
                summary += f"Overall Synthesis from this Query's Analysis: {synthesis}\n"

                internal_sources = analysis.get("internal_faiss_results_summary", [])
                if isinstance(internal_sources, list) and internal_sources:
                    summary += "  Internal Faiss Sources Consulted (up to 2 shown):\n"
                    for src_idx, src in enumerate(internal_sources[:2]):
                        doc_id = src.get("internal_doc_id", "N/A")
                        extracted = src.get("key_extracted_info_from_internal", "N/A")
                        relevance = src.get("relevance_score_to_query", "N/A")
                        summary += f"    Source {src_idx+1}: Internal Doc ID: {doc_id} (Relevance: {relevance})\n      Extracted Info: {extracted[:200]}...\n"

                google_sources = analysis.get("google_search_results_summary", [])
                if isinstance(google_sources, list) and google_sources:
                    summary += "  Google Search Sources Consulted (up to 3 shown):\n"
                    for src_idx, source in enumerate(google_sources[:3]):
                        title = source.get("source_title", "N/A"); url = source.get("source_url", "N/A")
                        relevance = source.get("relevance_score", "N/A")
                        extracted_info = source.get("key_extracted_info", "N/A")
                        direct_quote = source.get("direct_quote", "")

                        summary += f"    Source {src_idx+1}: {title} (Relevance: {relevance})\n"
                        summary += f"      URL: {url}\n"
                        summary += f"      Extracted Info: {extracted_info[:300]}{'...' if len(extracted_info) > 300 else ''}\n"
                        if direct_quote:
                             summary += f"      Direct Quote: \"{direct_quote[:200]}{'...' if len(direct_quote) > 200 else ''}\"\n"
                    if len(google_sources) > 3:
                        summary += f"    ...and {len(google_sources)-3} more Google source(s) analyzed for this query (details omitted for brevity).\n"

                new_leads = analysis.get("new_questions_or_leads_from_this_query", [])
                if new_leads:
                    summary += f"  New Leads/Questions identified: {', '.join(q for q in new_leads if isinstance(q,str))}\n"
                tool_errors = analysis.get("tool_errors")
                if tool_errors:
                     summary += f"  Tool Errors during this Query's Analysis: {tool_errors}\n"

            summary += f"\n\n=== OVERALL RESEARCH STATUS FOR THIS SESSION ({self.session_id}) ===\n"
            summary += f"Total Unique Sources Referenced This Session: {len(self.session_sources_referenced)}\n"
            summary += f"All Completed Queries This Session ({len(self.completed_sub_queries)}): {list(self.completed_sub_queries)}\n"
            summary += f"Queries Still Pending This Session: {self.pending_sub_queries if self.pending_sub_queries else 'None'}\n"

            if self.errors:
                summary += f"\n=== ERRORS LOGGED IN KNOWLEDGE BASE THIS SESSION ({len(self.errors)}) ===\n"
                for err_idx, err in enumerate(self.errors[-5:]):
                    summary += f"  KB Error {err_idx+1}: Agent '{err.get('agent', 'N/A')}' - {err.get('message', 'N/A')}\n"
                    if err.get('details'): summary += f"    Details: {str(err.get('details'))[:200]}...\n"
                if len(self.errors) > 5: summary += "  ...more errors logged in KB (omitted for brevity).\n"

            return summary[:max_len]
        except Exception as e:
            error_msg = f"Error generating consolidator summary: {e}"; print(error_msg)
            self.add_error("KBSummaryConsolidator", error_msg)
            return f"Original Query: {self.original_query}\nError: Unable to generate full consolidator summary."

    def to_dict(self) -> Dict[str, Any]:
        return {
            "session_id": self.session_id,
            "original_query": self.original_query,
            "main_topics_identified": self.main_topics_identified,
            "detailed_findings": self.detailed_findings,
            "pending_sub_queries": self.pending_sub_queries,
            "completed_sub_queries": list(self.completed_sub_queries),
            "session_sources_referenced": [list(s) for s in self.session_sources_referenced],
            "errors": self.errors,
            "query_dependencies": self.query_dependencies,
            "query_generation_reasons": self.query_generation_reasons,
            "global_faiss_doc_count": self.global_faiss_doc_count,
            "creator_plan": self.creator_plan,
            "creator_completed_subtasks_count": self.creator_completed_subtasks_count,
            "created_artifacts": self.created_artifacts
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any], default_session_id: Optional[str] = None) -> 'KnowledgeBase':
        session_id_from_data = data.get("session_id")
        original_query_from_data = data.get("original_query", "Unknown Query - KB Deserialization")

        if session_id_from_data:
            final_session_id = session_id_from_data
        elif default_session_id:
            final_session_id = default_session_id
        else:
            final_session_id = sanitize_filename(original_query_from_data) + "_" + str(int(time.time()))

        kb = cls(original_query_from_data, final_session_id)
        kb.main_topics_identified = data.get("main_topics_identified", [])
        kb.detailed_findings = data.get("detailed_findings", [])
        pending_sq = data.get("pending_sub_queries", [])
        kb.pending_sub_queries = pending_sq if pending_sq else [original_query_from_data]

        kb.completed_sub_queries = set(data.get("completed_sub_queries", []))
        kb.session_sources_referenced = set(tuple(s) for s in data.get("session_sources_referenced", []) if isinstance(s, list) and len(s) == 2)
        kb.errors = data.get("errors", [])
        kb.query_dependencies = data.get("query_dependencies", {})
        kb.query_generation_reasons = data.get("query_generation_reasons", {})
        kb.global_faiss_doc_count = data.get("global_faiss_doc_count", 0)
        kb.creator_plan = data.get("creator_plan", [])
        kb.creator_completed_subtasks_count = data.get("creator_completed_subtasks_count", 0)
        kb.created_artifacts = data.get("created_artifacts", {})
        return kb

# --- Faiss Manager (Operates on Global Shared Index) ---
class SimpleFaissManager:
    def __init__(self, dimension: int,
                 global_document_store_ref: Dict[str, Dict[str, Any]],
                 global_faiss_map_ref: List[str],
                 embedding_fn_ref: callable,
                 gcs_bucket_name: Optional[str] = None, gcs_blob_name: Optional[str] = None,
                 force_new_index: bool = False):
        self.dimension = dimension
        self.index: Optional[faiss.Index] = None
        self.global_doc_store = global_document_store_ref
        self.global_faiss_map = global_faiss_map_ref
        self.embedding_function = embedding_fn_ref

        self.gcs_bucket_name = gcs_bucket_name
        self.gcs_blob_name = gcs_blob_name
        self.local_temp_index_path = TEMP_FAISS_INDEX_PATH

        if not FAISS_AVAILABLE:
            print("SimpleFaissManager: Faiss library not available. RAG operations will be disabled.")
            return

        if force_new_index:
            print("SimpleFaissManager: `force_new_index` is true. Creating new, empty Faiss index object.")
            if FAISS_AVAILABLE: self.index = faiss.IndexFlatL2(self.dimension)
            else: print("SimpleFaissManager CRITICAL: FAISS_AVAILABLE false but reached index creation."); return
        else:
            loaded_successfully = False
            if self.gcs_bucket_name and self.gcs_blob_name and GCS_AVAILABLE and storage:
                if self._load_index_from_gcs_internal(): loaded_successfully = True
            if not loaded_successfully and os.path.exists(LOCAL_FAISS_INDEX_PATH):
                if self._load_index_from_local_internal(LOCAL_FAISS_INDEX_PATH): loaded_successfully = True
            if not loaded_successfully:
                print(f"SimpleFaissManager: No existing Faiss index found. Creating new empty index object.")
                if FAISS_AVAILABLE: self.index = faiss.IndexFlatL2(self.dimension)
                else: print("SimpleFaissManager CRITICAL: FAISS_AVAILABLE false after load fail."); return

        if self.index:
            print(f"SimpleFaissManager initialized. Index has {self.index.ntotal} embeddings. Store: {len(self.global_doc_store)} docs, Map: {len(self.global_faiss_map)} IDs.")
            if not force_new_index and self.index.ntotal > 0 and self.index.ntotal != len(self.global_faiss_map):
                print(f"  WARNING: Faiss index count ({self.index.ntotal}) != map size ({len(self.global_faiss_map)}). Potential desync.")
        elif FAISS_AVAILABLE:
            print("SimpleFaissManager CRITICAL: Faiss index is None after init. Creating fallback empty index.")
            self.index = faiss.IndexFlatL2(self.dimension)

    def _get_gcs_blob(self):
        if not GCS_AVAILABLE or not storage or not self.gcs_bucket_name or not self.gcs_blob_name: return None
        try:
            storage_client = storage.Client(project=VERTEX_PROJECT_ID if VERTEX_PROJECT_ID else None)
            bucket = storage_client.bucket(self.gcs_bucket_name.replace("gs://", "").rstrip("/"))
            blob_name = self.gcs_blob_name
            if blob_name.startswith(f"gs://{bucket.name}/"): blob_name = blob_name[len(f"gs://{bucket.name}/"):]
            elif blob_name.startswith(f"{bucket.name}/"): blob_name = blob_name[len(f"{bucket.name}/"):]
            return bucket.blob(blob_name)
        except Exception as e: print(f"GCS Error getting blob gs://{self.gcs_bucket_name}/{self.gcs_blob_name}: {e}"); return None

    def _load_index_from_gcs_internal(self) -> bool:
        print(f"Faiss(GCS): Attempting to load index from gs://{self.gcs_bucket_name}/{self.gcs_blob_name}...")
        blob = self._get_gcs_blob()
        if blob and blob.exists():
            try:
                os.makedirs(os.path.dirname(self.local_temp_index_path), exist_ok=True)
                blob.download_to_filename(self.local_temp_index_path)
                self.index = faiss.read_index(self.local_temp_index_path)
                print(f"Faiss(GCS): Index loaded. Size: {self.index.ntotal if self.index else 'None'}")
                if os.path.exists(self.local_temp_index_path): os.remove(self.local_temp_index_path)
                return True
            except Exception as e: print(f"Faiss(GCS): Error loading index: {e}.");
            if os.path.exists(self.local_temp_index_path): os.remove(self.local_temp_index_path)
        else: print(f"Faiss(GCS): Index not found.")
        return False

    def _load_index_from_local_internal(self, index_file_path: str) -> bool:
        try:
            print(f"Faiss(Local): Attempting to load index from {index_file_path}...")
            self.index = faiss.read_index(index_file_path)
            print(f"Faiss(Local): Index loaded. Size: {self.index.ntotal if self.index else 'None'}")
            return True
        except Exception as e: print(f"Faiss(Local): Error loading index: {e}."); return False

    def save_index(self):
        if not FAISS_AVAILABLE or not self.index: print("Faiss: Index not available/init. Cannot save."); return
        os.makedirs(os.path.dirname(self.local_temp_index_path), exist_ok=True)
        if self.gcs_bucket_name and self.gcs_blob_name and GCS_AVAILABLE and storage:
            blob = self._get_gcs_blob()
            if not blob: self._save_index_locally_internal(); return
            try:
                faiss.write_index(self.index, self.local_temp_index_path)
                print(f"Faiss(GCS): Index temp saved to {self.local_temp_index_path} for upload ({self.index.ntotal} vectors).")
                blob.upload_from_filename(self.local_temp_index_path)
                print(f"Faiss(GCS): Index saved to GCS.")
                if os.path.exists(self.local_temp_index_path): os.remove(self.local_temp_index_path)
            except Exception as e:
                print(f"Faiss(GCS): Error saving index to GCS: {e}. Attempting local save.")
                if os.path.exists(self.local_temp_index_path): os.remove(self.local_temp_index_path)
                self._save_index_locally_internal()
        else:
            self._save_index_locally_internal()

    def _save_index_locally_internal(self):
        if not FAISS_AVAILABLE or not self.index: return
        try:
            os.makedirs(GLOBAL_RAG_DIR, exist_ok=True)
            print(f"Faiss(Local): Saving index to {LOCAL_FAISS_INDEX_PATH} ({self.index.ntotal} vectors)...")
            faiss.write_index(self.index, LOCAL_FAISS_INDEX_PATH)
            print(f"Faiss(Local): Index saved.")
        except Exception as e: print(f"Faiss(Local): Error saving index: {e}")


    async def add_documents_from_kb_async(self):
        if not FAISS_AVAILABLE: print("Faiss: Not available. Skipping add_docs."); return
        if not self.index:
            if FAISS_AVAILABLE: self.index = faiss.IndexFlatL2(self.dimension); print("Faiss: Index was None, created new for add_docs.")
            else: return
        if not self.embedding_function: print("Faiss: Embedding function missing. Cannot add docs."); return

        docs_to_embed_content: List[str] = []
        doc_ids_for_new_embeddings: List[str] = []
        doc_titles_for_new_embeddings: List[str] = []
        current_indexed_doc_ids = set(self.global_faiss_map)

        for doc_id, doc_data in self.global_doc_store.items():
            if doc_id not in current_indexed_doc_ids:
                content = doc_data.get('content')
                if content:
                    docs_to_embed_content.append(content)
                    doc_ids_for_new_embeddings.append(doc_id)
                    title = doc_data.get('summary', content[:100] if content else "Untitled Document")
                    doc_titles_for_new_embeddings.append(title)

        if docs_to_embed_content:
            print(f"Faiss: Embedding {len(docs_to_embed_content)} new docs from global store for indexing...")
            embedding_vectors_list = await self.embedding_function(
                texts=docs_to_embed_content, task_type="RETRIEVAL_DOCUMENT",
                titles=doc_titles_for_new_embeddings, output_dimensionality=self.dimension
            )
            successful_embeddings_np_list = []
            final_doc_ids_for_map_extension = []
            for i, emb_floats in enumerate(embedding_vectors_list):
                doc_id = doc_ids_for_new_embeddings[i]
                if emb_floats and len(emb_floats) == self.dimension:
                    successful_embeddings_np_list.append(np.array(emb_floats, dtype='float32'))
                    final_doc_ids_for_map_extension.append(doc_id)
                else:
                    print(f"  WARNING: Failed/invalid embedding for doc ID '{doc_id}'. Will not be indexed.")
            if successful_embeddings_np_list:
                embeddings_array = np.vstack(successful_embeddings_np_list)
                self.add_new_embeddings(embeddings_array, final_doc_ids_for_map_extension)
        else:
            print("Faiss: No new documents in global store to add to index based on current map.")

    def add_new_embeddings(self, embeddings_array: np.ndarray, new_doc_ids_for_map_extension: List[str]):
        if not FAISS_AVAILABLE: print("Faiss: Unavailable. Cannot add embeddings."); return
        if not self.index: self.index = faiss.IndexFlatL2(self.dimension); print("Faiss: Index was None, created new.")
        if embeddings_array.shape[0] == 0: print("Faiss: No new embeddings to add."); return
        if embeddings_array.ndim == 1: embeddings_array = embeddings_array.reshape(1, -1)
        if embeddings_array.shape[1] != self.dimension: print(f"Faiss Error: Dim mismatch. Expected {self.dimension}, got {embeddings_array.shape[1]}."); return

        self.index.add(embeddings_array.astype('float32'))
        self.global_faiss_map.extend(new_doc_ids_for_map_extension)
        print(f"Faiss: Added {embeddings_array.shape[0]} embeddings. Total: {self.index.ntotal}. Map size: {len(self.global_faiss_map)}.")
        self.save_index()

    async def search_global_index_async(self, query: str, top_k: int = RAG_TOP_K) -> List[Dict[str, Any]]:
        if not FAISS_AVAILABLE: print("Faiss: Not available. Cannot search."); return []
        if not self.index or self.index.ntotal == 0: print(f"Faiss: Index empty or None. Cannot search."); return []
        if not self.embedding_function: print("Faiss: Embedding function missing. Cannot search."); return []

        print(f"Faiss: Async searching for '{query[:50]}...' (top_k={top_k}, index size={self.index.ntotal})")
        try:
            embedding_vectors_list = await self.embedding_function(
                texts=[query], task_type="RETRIEVAL_QUERY", titles=None, output_dimensionality=self.dimension
            )
            if not embedding_vectors_list or not embedding_vectors_list[0] or len(embedding_vectors_list[0]) != self.dimension:
                print(f"Faiss Error: Invalid embedding for query '{query}'."); return []

            query_embedding_np = np.array(embedding_vectors_list[0], dtype='float32').reshape(1, -1)
            actual_top_k = min(top_k, self.index.ntotal)
            if actual_top_k == 0 : return []

            distances, faiss_indices_sequential = self.index.search(query_embedding_np, actual_top_k)
            results = []
            if faiss_indices_sequential.size > 0 and len(faiss_indices_sequential[0]) > 0:
                for i, seq_idx in enumerate(faiss_indices_sequential[0]):
                    if not (0 <= seq_idx < len(self.global_faiss_map)): continue
                    doc_id = self.global_faiss_map[seq_idx]
                    doc_data = self.global_doc_store.get(doc_id)
                    if doc_data:
                        dist = float(distances[0][i])
                        results.append({
                            "internal_doc_id": doc_id,
                            "summary": doc_data.get("summary", doc_data.get("content",""))[:250]+"...", # Use content if summary missing
                            "content_preview": doc_data.get("content", "")[:250] + "...", # Keep content_preview for consistency
                            "similarity_score": 1 / (1 + dist) if dist >=0 else 0
                        })
            print(f"  Faiss Async: Found {len(results)} results for query '{query[:50]}...'")
            return results
        except Exception as e: print(f"Faiss Error during async search: {e}"); traceback.print_exc(); return []

    def clear_global_index_data(self):
        if not FAISS_AVAILABLE: print("Faiss not available."); return
        if self.index: self.index.reset(); print("Faiss index reset.")
        self.global_doc_store.clear(); self.global_faiss_map.clear()
        print("Global doc store and Faiss map cleared.")
        if FAISS_AVAILABLE: self.index = faiss.IndexFlatL2(self.dimension); print("New empty Faiss index created.")

# --- Agent System Class (Main orchestrator) ---
class DeepResearchSystem:
    def __init__(self, project_id: str, location: str, load_previous_session_id: Optional[str] = None, force_new_faiss_and_global_stores: bool = False):
        self.project_id = project_id
        self.location = location
        self.vertex_client = None
        self.embedding_model: Optional[TextEmbeddingModel] = None # Explicitly type hint
        self.research_log: List[Dict[str, Any]] = []
        self.knowledge_base: Optional[KnowledgeBase] = None
        self.faiss_manager: Optional[SimpleFaissManager] = None

        self.master_document_store: Dict[str, Dict[str, Any]] = {}
        self.master_faiss_idx_to_doc_id_map: List[str] = []

        if GOOGLE_GENAI_SDK_AVAILABLE and genai:
            try:
                self.vertex_client = genai.Client(vertexai=True, project=self.project_id, location=self.location)
                print(f"DeepResearchSystem: Vertex AI Client (genai.Client) initialized for text models using project '{self.project_id}' and location '{self.location}'.")
            except ValueError as ve:
                print(f"DeepResearchSystem: CRITICAL ValueError during genai.Client initialization: {ve}")
                print("Ensure VERTEX_PROJECT_ID and VERTEX_LOCATION are correctly set and the client has permissions.")
                self.vertex_client = None
            except Exception as e_client:
                print(f"DeepResearchSystem: CRITICAL ERROR initializing genai.Client: {e_client}")
                traceback.print_exc()
                self.vertex_client = None
        else:
            print("DeepResearchSystem: google.genai SDK not available. Text generation models will NOT work.")
            self.vertex_client = None

        if VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY and TEXT_EMBEDDING_MODEL_CLASS_AVAILABLE and TextEmbeddingModel:
            try:
                print(f"DeepResearchSystem: Attempting to load embedding model 'gemini-embedding-001'...")
                self.embedding_model = TextEmbeddingModel.from_pretrained("gemini-embedding-001")
                print("DeepResearchSystem: Successfully loaded embedding model 'gemini-embedding-001'.")
            except Exception as e_load_embed:
                print(f"DeepResearchSystem: WARNING - Failed to load embedding model 'gemini-embedding-001': {e_load_embed}")
                traceback.print_exc()
                self.embedding_model = None
        else:
            if not VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY:
                print("DeepResearchSystem: Embedding model not loaded because global Vertex AI SDK initialization failed or was skipped.")
            elif not TEXT_EMBEDDING_MODEL_CLASS_AVAILABLE or not TextEmbeddingModel:
                print("DeepResearchSystem: Embedding model not loaded because TextEmbeddingModel class was not imported successfully.")
            self.embedding_model = None

        os.makedirs(SESSIONS_DIR, exist_ok=True)
        os.makedirs(GLOBAL_RAG_DIR, exist_ok=True)
        self._load_global_rag_stores()
        self._initialize_faiss_manager_globally(force_new_index=force_new_faiss_and_global_stores)

        if load_previous_session_id:
            if not self._load_session_state(load_previous_session_id):
                print(f"Failed to load session {load_previous_session_id}. Will proceed as new if query provided later in run_deep_research.")
                self.knowledge_base = None
                self.research_log = []

        if self.knowledge_base:
            if self.faiss_manager and self.faiss_manager.index:
                self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
            else:
                self.knowledge_base.global_faiss_doc_count = 0
        print(f"DeepResearchSystem initialized. Global Faiss Index has {self.faiss_manager.index.ntotal if self.faiss_manager and self.faiss_manager.index else 'N/A (Faiss unavailable or not initialized)'} docs.")
        print(f"Master document store has {len(self.master_document_store)} entries. Master Faiss map has {len(self.master_faiss_idx_to_doc_id_map)} entries.")

    def _load_global_rag_stores(self):
        os.makedirs(GLOBAL_RAG_DIR, exist_ok=True)
        print("--- Loading Global RAG Stores ---")
        loaded_doc_store_from_gcs = False
        if GCS_AVAILABLE and storage and FAISS_INDEX_GCS_BUCKET_NAME and GLOBAL_DOC_STORE_GCS_BLOB_NAME:
            print(f"Attempting to load Global Document Store from GCS: gs://{FAISS_INDEX_GCS_BUCKET_NAME}/{GLOBAL_DOC_STORE_GCS_BLOB_NAME}")
            try:
                gcs_project_id = self.project_id if self.project_id else os.environ.get("GOOGLE_CLOUD_PROJECT")
                if not gcs_project_id:
                    print("Warning (GCS Load): Project ID for GCS client is not set. Using default project if available.")
                client = storage.Client(project=gcs_project_id)
                bucket = client.bucket(FAISS_INDEX_GCS_BUCKET_NAME.replace("gs://", "").rstrip("/"))
                blob_name_cleaned = GLOBAL_DOC_STORE_GCS_BLOB_NAME
                if blob_name_cleaned.startswith(f"gs://{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"gs://{bucket.name}/"):]
                elif blob_name_cleaned.startswith(f"{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"{bucket.name}/"):]
                blob = bucket.blob(blob_name_cleaned)
                if blob.exists():
                    content = blob.download_as_string()
                    self.master_document_store = json.loads(content)
                    print(f"Global document store loaded from GCS: {len(self.master_document_store)} documents.")
                    loaded_doc_store_from_gcs = True
                else:
                    print(f"Global document store not found in GCS at gs://{bucket.name}/{blob_name_cleaned}.")
            except Exception as e:
                print(f"Error loading global document store from GCS: {e}. Will try local.")
                traceback.print_exc()
        if not loaded_doc_store_from_gcs:
            if os.path.exists(GLOBAL_DOC_STORE_PATH):
                print(f"Attempting to load Global Document Store from local fallback: {GLOBAL_DOC_STORE_PATH}")
                try:
                    with open(GLOBAL_DOC_STORE_PATH, 'r', encoding='utf-8') as f: self.master_document_store = json.load(f)
                    print(f"Global document store loaded from local fallback: {len(self.master_document_store)} documents.")
                except Exception as e:
                    print(f"Error loading global document store from local fallback: {e}. Initializing empty store.")
                    self.master_document_store = {}
            else:
                 print("Global document store not found in GCS or local fallback. Initializing empty store.")
                 self.master_document_store = {}
        loaded_faiss_map_from_gcs = False
        if GCS_AVAILABLE and storage and FAISS_INDEX_GCS_BUCKET_NAME and GLOBAL_FAISS_MAP_GCS_BLOB_NAME:
            print(f"Attempting to load Global Faiss Map from GCS: gs://{FAISS_INDEX_GCS_BUCKET_NAME}/{GLOBAL_FAISS_MAP_GCS_BLOB_NAME}")
            try:
                gcs_project_id = self.project_id if self.project_id else os.environ.get("GOOGLE_CLOUD_PROJECT")
                client = storage.Client(project=gcs_project_id)
                bucket = client.bucket(FAISS_INDEX_GCS_BUCKET_NAME.replace("gs://", "").rstrip("/"))
                blob_name_cleaned = GLOBAL_FAISS_MAP_GCS_BLOB_NAME
                if blob_name_cleaned.startswith(f"gs://{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"gs://{bucket.name}/"):]
                elif blob_name_cleaned.startswith(f"{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"{bucket.name}/"):]
                blob = bucket.blob(blob_name_cleaned)
                if blob.exists():
                    content = blob.download_as_string()
                    self.master_faiss_idx_to_doc_id_map = json.loads(content)
                    print(f"Global Faiss ID map loaded from GCS: {len(self.master_faiss_idx_to_doc_id_map)} mappings.")
                    loaded_faiss_map_from_gcs = True
                else:
                    print(f"Global Faiss map not found in GCS at gs://{bucket.name}/{blob_name_cleaned}.")
            except Exception as e:
                print(f"Error loading global Faiss map from GCS: {e}. Will try local.")
                traceback.print_exc()
        if not loaded_faiss_map_from_gcs:
            if os.path.exists(GLOBAL_FAISS_MAP_PATH):
                print(f"Attempting to load Global Faiss Map from local fallback: {GLOBAL_FAISS_MAP_PATH}")
                try:
                    with open(GLOBAL_FAISS_MAP_PATH, 'r', encoding='utf-8') as f: self.master_faiss_idx_to_doc_id_map = json.load(f)
                    print(f"Global Faiss ID map loaded from local fallback: {len(self.master_faiss_idx_to_doc_id_map)} mappings.")
                except Exception as e:
                    print(f"Error loading global Faiss ID map from local fallback: {e}. Initializing empty map.")
                    self.master_faiss_idx_to_doc_id_map = []
            else:
                print("Global Faiss map not found in GCS or local fallback. Initializing empty map.")
                self.master_faiss_idx_to_doc_id_map = []
        print("--- Finished Loading Global RAG Stores ---")

    def _save_global_rag_stores(self):
        os.makedirs(GLOBAL_RAG_DIR, exist_ok=True)
        print("--- Saving Global RAG Stores ---")
        try:
            doc_store_json_str = json.dumps(self.master_document_store, indent=2)
            gcs_doc_store_saved = False
            if GCS_AVAILABLE and storage and FAISS_INDEX_GCS_BUCKET_NAME and GLOBAL_DOC_STORE_GCS_BLOB_NAME:
                print(f"Attempting to save Global Document Store to GCS: gs://{FAISS_INDEX_GCS_BUCKET_NAME}/{GLOBAL_DOC_STORE_GCS_BLOB_NAME}")
                try:
                    gcs_project_id = self.project_id if self.project_id else os.environ.get("GOOGLE_CLOUD_PROJECT")
                    client = storage.Client(project=gcs_project_id)
                    bucket = client.bucket(FAISS_INDEX_GCS_BUCKET_NAME.replace("gs://", "").rstrip("/"))
                    blob_name_cleaned = GLOBAL_DOC_STORE_GCS_BLOB_NAME
                    if blob_name_cleaned.startswith(f"gs://{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"gs://{bucket.name}/"):]
                    elif blob_name_cleaned.startswith(f"{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"{bucket.name}/"):]
                    blob = bucket.blob(blob_name_cleaned)
                    blob.upload_from_string(doc_store_json_str, content_type='application/json')
                    print(f"Global document store ({len(self.master_document_store)} entries) saved to GCS.")
                    gcs_doc_store_saved = True
                except Exception as e:
                    print(f"Error saving global document store to GCS: {e}.")
                    traceback.print_exc()
            try:
                with open(GLOBAL_DOC_STORE_PATH, 'w', encoding='utf-8') as f: f.write(doc_store_json_str)
                print(f"Global document store {'also ' if gcs_doc_store_saved else ''}saved locally to {GLOBAL_DOC_STORE_PATH}.")
            except Exception as e_local:
                print(f"Error saving global document store locally: {e_local}")

            faiss_map_json_str = json.dumps(self.master_faiss_idx_to_doc_id_map, indent=2)
            gcs_faiss_map_saved = False
            if GCS_AVAILABLE and storage and FAISS_INDEX_GCS_BUCKET_NAME and GLOBAL_FAISS_MAP_GCS_BLOB_NAME:
                print(f"Attempting to save Global Faiss Map to GCS: gs://{FAISS_INDEX_GCS_BUCKET_NAME}/{GLOBAL_FAISS_MAP_GCS_BLOB_NAME}")
                try:
                    gcs_project_id = self.project_id if self.project_id else os.environ.get("GOOGLE_CLOUD_PROJECT")
                    client = storage.Client(project=gcs_project_id)
                    bucket = client.bucket(FAISS_INDEX_GCS_BUCKET_NAME.replace("gs://", "").rstrip("/"))
                    blob_name_cleaned = GLOBAL_FAISS_MAP_GCS_BLOB_NAME
                    if blob_name_cleaned.startswith(f"gs://{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"gs://{bucket.name}/"):]
                    elif blob_name_cleaned.startswith(f"{bucket.name}/"): blob_name_cleaned = blob_name_cleaned[len(f"{bucket.name}/"):]
                    blob = bucket.blob(blob_name_cleaned)
                    blob.upload_from_string(faiss_map_json_str, content_type='application/json')
                    print(f"Global Faiss map ({len(self.master_faiss_idx_to_doc_id_map)} entries) saved to GCS.")
                    gcs_faiss_map_saved = True
                except Exception as e:
                    print(f"Error saving global Faiss map to GCS: {e}.")
                    traceback.print_exc()
            try:
                with open(GLOBAL_FAISS_MAP_PATH, 'w', encoding='utf-8') as f: f.write(faiss_map_json_str)
                print(f"Global Faiss map {'also ' if gcs_faiss_map_saved else ''}saved locally to {GLOBAL_FAISS_MAP_PATH}.")
            except Exception as e_local:
                print(f"Error saving global Faiss map locally: {e_local}")
        except Exception as e_outer_save:
            print(f"Outer error during _save_global_rag_stores: {e_outer_save}")
            traceback.print_exc()
        print("--- Finished Saving Global RAG Stores ---")

    def _initialize_faiss_manager_globally(self, force_new_index: bool = False):
        if not FAISS_AVAILABLE:
            print("Faiss library not available, skipping Faiss manager initialization.")
            self.faiss_manager = None
            if self.knowledge_base: self.knowledge_base.global_faiss_doc_count = 0
            return

        if self.faiss_manager and not force_new_index:
            print("Global Faiss Manager already initialized and not forcing new.")
            if self.faiss_manager.index and self.knowledge_base:
                 self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
            return

        if force_new_index:
            print("Forcing new GLOBAL Faiss index and clearing global document/map stores.")
            if GCS_AVAILABLE and storage and FAISS_INDEX_GCS_BUCKET_NAME and FAISS_INDEX_GCS_BLOB_NAME:
                try:
                    # Create a temporary SimpleFaissManager instance just for its GCS blob access logic
                    # This dummy embedding function will not be called if we're just getting a blob for deletion.
                    async def dummy_embedding_fn_for_delete(texts, task_type, titles, output_dimensionality): return [None]*len(texts)

                    temp_manager_for_delete = SimpleFaissManager(
                        FAISS_DIMENSION, {}, [],
                        embedding_fn_ref=dummy_embedding_fn_for_delete,
                        gcs_bucket_name=FAISS_INDEX_GCS_BUCKET_NAME,
                        gcs_blob_name=FAISS_INDEX_GCS_BLOB_NAME,
                        force_new_index=True # This ensures it doesn't try to load if GCS is primary
                    )
                    blob_to_delete = temp_manager_for_delete._get_gcs_blob() # Use its internal GCS logic
                    if blob_to_delete and blob_to_delete.exists():
                        print(f"Attempting to delete existing GCS Faiss index: gs://{FAISS_INDEX_GCS_BUCKET_NAME}/{FAISS_INDEX_GCS_BLOB_NAME}")
                        blob_to_delete.delete()
                        print(f"Deleted existing GCS Faiss index.")
                    else:
                        print(f"No GCS Faiss index found at gs://{FAISS_INDEX_GCS_BUCKET_NAME}/{FAISS_INDEX_GCS_BLOB_NAME} to delete.")
                except Exception as e_gcs_delete:
                    print(f"Could not delete GCS Faiss index during force_new: {e_gcs_delete}")
                    traceback.print_exc()

            if os.path.exists(LOCAL_FAISS_INDEX_PATH):
                try: os.remove(LOCAL_FAISS_INDEX_PATH); print(f"Deleted local Faiss index: {LOCAL_FAISS_INDEX_PATH}")
                except Exception as e: print(f"Could not delete local Faiss index: {e}")

            self.master_document_store.clear()
            self.master_faiss_idx_to_doc_id_map.clear()

            if os.path.exists(GLOBAL_DOC_STORE_PATH):
                try: os.remove(GLOBAL_DOC_STORE_PATH); print(f"Deleted local doc store: {GLOBAL_DOC_STORE_PATH}")
                except Exception as e: print(f"Could not delete local doc store: {e}")
            if os.path.exists(GLOBAL_FAISS_MAP_PATH):
                try: os.remove(GLOBAL_FAISS_MAP_PATH); print(f"Deleted local Faiss map: {GLOBAL_FAISS_MAP_PATH}")
                except Exception as e: print(f"Could not delete local Faiss map: {e}")
            self._save_global_rag_stores() # Save the now-empty stores

        # Crucial: Pass the actual embedding function from DeepResearchSystem
        self.faiss_manager = SimpleFaissManager(
            dimension=FAISS_DIMENSION,
            global_document_store_ref=self.master_document_store,
            global_faiss_map_ref=self.master_faiss_idx_to_doc_id_map,
            embedding_fn_ref=self._get_vertex_embeddings_batch, # Pass the method from self
            gcs_bucket_name=FAISS_INDEX_GCS_BUCKET_NAME,
            gcs_blob_name=FAISS_INDEX_GCS_BLOB_NAME,
            force_new_index=force_new_index
        )
        if self.knowledge_base:
            if self.faiss_manager and self.faiss_manager.index:
                self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
            else: self.knowledge_base.global_faiss_doc_count = 0
        print(f"Faiss Manager initialized. Global Faiss Index has {self.faiss_manager.index.ntotal if self.faiss_manager and self.faiss_manager.index else 'N/A'} docs.")

    def _load_session_state(self, session_id: str) -> bool:
        session_dir = os.path.join(SESSIONS_DIR, session_id)
        session_kb_path = os.path.join(session_dir, "knowledge_base.json")
        session_log_path = os.path.join(session_dir, "research_log.json")
        kb_loaded = False

        if not os.path.isdir(session_dir):
            print(f"Session directory {session_dir} does not exist.")
            return False

        if os.path.exists(session_kb_path):
            try:
                with open(session_kb_path, 'r', encoding='utf-8') as f:
                    kb_data = json.load(f)
                self.knowledge_base = KnowledgeBase.from_dict(kb_data, default_session_id=session_id)
                print(f"KnowledgeBase for session '{session_id}' loaded. Original query: '{self.knowledge_base.original_query}'")
                if self.faiss_manager and self.faiss_manager.index:
                    self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
                else:
                    self.knowledge_base.global_faiss_doc_count = len(self.master_document_store) if self.master_document_store else 0
                kb_loaded = True
            except Exception as e:
                print(f"Error loading KnowledgeBase for session '{session_id}': {e}.")
                traceback.print_exc()
        else:
            print(f"No KnowledgeBase file found for session '{session_id}'.")

        if os.path.exists(session_log_path):
            try:
                with open(session_log_path, 'r', encoding='utf-8') as f:
                    self.research_log = json.load(f)
                print(f"Research log for session '{session_id}' loaded: {len(self.research_log)} entries.")
            except Exception as e:
                print(f"Error loading research log for session '{session_id}': {e}.")
                traceback.print_exc()
        else:
            self.research_log = []
            print(f"No research log file found for session '{session_id}'. Initializing empty log.")

        return kb_loaded

    def _save_current_session_state(self):
        if not self.knowledge_base:
            print("No active KnowledgeBase to save.")
            return

        session_id = self.knowledge_base.session_id
        session_dir = os.path.join(SESSIONS_DIR, session_id)
        os.makedirs(session_dir, exist_ok=True)
        print(f"\n--- Saving state for session '{session_id}' ---")

        try:
            with open(os.path.join(session_dir, "knowledge_base.json"), 'w', encoding='utf-8') as f:
                json.dump(self.knowledge_base.to_dict(), f, indent=2, ensure_ascii=False)
            print(f"Session KnowledgeBase saved to {os.path.join(session_dir, 'knowledge_base.json')}.")
        except Exception as e:
            print(f"Error saving session KnowledgeBase: {e}")
            traceback.print_exc()

        if self.research_log:
            try:
                with open(os.path.join(session_dir, "research_log.json"), 'w', encoding='utf-8') as f:
                    json.dump(self.research_log, f, indent=2, ensure_ascii=False)
                print(f"Session research log saved to {os.path.join(session_dir, 'research_log.json')}.")
            except Exception as e:
                print(f"Error saving session research log: {e}")
                traceback.print_exc()

        if self.faiss_manager:
            self.faiss_manager.save_index()
        self._save_global_rag_stores()



    async def _attempt_json_self_correction_with_llm(self, original_malformed_json_str: str, original_query_context: str, expected_type: str = "object") -> str:
        print(f"   Calling JSON Fixer LLM (expecting: {expected_type})...")

        if expected_type == "list":
            json_fixer_system_prompt = (
                "You are a specialized AI assistant. Your SOLE task is to correct a malformed string that was INTENDED to be a JSON LIST of strings. "
                "Examine the provided text. If it looks like a list of search queries, ensure it is formatted as a valid JSON list of strings. "
                "Output ONLY the corrected, valid JSON list (e.g., [\"query1\", \"query2\"]). "
                "If the input is unsalvageable as a JSON list or seems to be a different JSON type, output an empty JSON list []. "
                "DO NOT add any explanations or conversational text. ONLY the JSON list."
            )
            empty_fallback = "[]"
        else: # Default to object
            json_fixer_system_prompt = (
                "You are a specialized AI assistant. Your SOLE task is to correct a malformed string that was INTENDED to be a single JSON OBJECT. "
                "The expected schema might have keys like 'search_query_executed', 'internal_faiss_results_summary', 'google_search_needed_and_performed', 'google_search_results_summary', 'overall_synthesis_for_this_query', 'new_questions_or_leads_from_this_query', 'tool_errors' OR keys like 'artifact_id', 'generated_content', 'confidence_score', 'notes_or_questions_for_next_step'. "
                "Examine the provided text. Ensure it is a single, valid JSON object. "
                "CRITICAL: If the input text appears to be a JSON LIST (e.g., starts with '[' and ends with ']'), you MUST attempt to convert it into a meaningful JSON OBJECT if possible (e.g., if it's a list of questions, make it `{\"new_questions_or_leads_from_this_query\": [questions_list]}`), OR if it cannot be converted to a relevant object, output an empty JSON object {}. "
                "Output ONLY the corrected, valid JSON OBJECT. "
                "If the input is unsalvageable as a JSON object, output an empty JSON object {}. "
                "DO NOT add any explanations or conversational text. ONLY the JSON object."
            )
            empty_fallback = "{}"


        fixer_user_content = (
            f"The following text was an attempt to generate a JSON {expected_type.upper()} for the query context: '{original_query_context[:150]}...'\n"
            f"However, it is malformed or not the correct JSON type. Please correct it to be a valid JSON {expected_type.upper()}.\n\n"
            f"Malformed/Incorrect-Type text:\n```text\n{original_malformed_json_str}\n```\nOutput ONLY the corrected, valid JSON {expected_type.upper()}."
        )

        corrected_response_text, _ = await self._call_llm(json_fixer_system_prompt, fixer_user_content, "Auxiliary_Lite_Task")
        _, corrected_json_str = parse_llm_response(corrected_response_text)

        if corrected_json_str.startswith("ERROR:"):
            print(f"   JSON Fixer LLM call failed: {corrected_json_str}")
            return empty_fallback # Return a valid empty JSON of the expected type
        try:
            # Validate if the corrected string is actually the expected type
            parsed_fixed_json = json.loads(corrected_json_str)
            if expected_type == "list" and not isinstance(parsed_fixed_json, list):
                print(f"   JSON Fixer output for expected list was not a list: {type(parsed_fixed_json)}. Returning empty list.")
                return "[]"
            elif expected_type == "object" and not isinstance(parsed_fixed_json, dict):
                print(f"   JSON Fixer output for expected object was not an object: {type(parsed_fixed_json)}. Returning empty object.")
                return "{}"

            print(f"   JSON self-correction attempt resulted in: {corrected_json_str[:200]}...")
            return corrected_json_str
        except json.JSONDecodeError as e:
            print(f"   JSON Fixer output also failed JSON parsing: {e}. Returning empty {expected_type}.")
            return empty_fallback
        except Exception as e_gen: # Catch any other unexpected errors during validation
            print(f"   Unexpected error during JSON Fixer validation: {e_gen}. Returning empty {expected_type}.")
            return empty_fallback



    async def _process_and_potentially_ingest_web_findings(self, search_analyzer_output: Dict[str, Any], original_query_that_led_to_this: str):
        if not self.faiss_manager:
            print("   Skipping dynamic RAG ingestion: Faiss manager not initialized.")
            return
        if not isinstance(search_analyzer_output, dict):
            print("   Skipping dynamic RAG ingestion: Invalid SearchAnalyzer output format.")
            return

        web_results = search_analyzer_output.get("google_search_results_summary", [])
        if not web_results or not isinstance(web_results, list):
            print("   No web results found in SearchAnalyzer output for dynamic RAG ingestion.")
            return

        docs_for_ingestion: List[Dict[str, Any]] = []
        print(f"   Considering {len(web_results)} web results for dynamic RAG ingestion (from query: '{original_query_that_led_to_this[:30]}...')...")

        for item_idx, item_data in enumerate(web_results):
            if not isinstance(item_data, dict):
                continue

            url = item_data.get("source_url")
            info = item_data.get("key_extracted_info")
            title = item_data.get("source_title", f"WebDocument_{item_idx}_{int(time.time())}")
            score_val = item_data.get("relevance_score", 0.0)
            score = float(score_val) if isinstance(score_val, (int, float, str)) and str(score_val).replace('.', '', 1).isdigit() else 0.0


            if info and url and score >= 0.75:
                try:
                    url_hash = hashlib.md5(url.encode('utf-8', 'ignore')).hexdigest()[:12]
                except Exception as e_hash:
                    url_hash = f"nohash{item_idx}"
                    print(f"      Error generating hash for URL '{url}': {e_hash}")

                ts_suffix = str(int(time.time()*1000))[-6:]
                doc_id = f"web_{url_hash}_{sanitize_filename(title, 15)}_{ts_suffix}"

                if doc_id in self.master_document_store:
                    print(f"     Skipping already existing RAG document ID '{doc_id}'.")
                    continue

                content = f"Source URL: {url}\nTitle: {title}\n\nExtracted Info:\n{info}"
                summary = f"From {url}: {title} - (Snippet: {info[:80]}...)"

                docs_for_ingestion.append({
                    "id": doc_id,
                    "content": content,
                    "summary": summary,
                    "metadata": {
                        "source_type": "dynamic_web_ingestion",
                        "original_web_source_url": url,
                        "discovered_during_query": original_query_that_led_to_this,
                        "search_analyzer_relevance": score,
                        "ingestion_timestamp": time.time()
                    }
                })
                print(f"     Queued for RAG (ID: {doc_id}): '{title[:50]}...'")

        if docs_for_ingestion:
            print(f"   Dynamically ingesting {len(docs_for_ingestion)} new web findings into RAG store...")
            await self.ingest_documents_into_rag(docs_for_ingestion)
        else:
            print("   No new web findings met criteria for dynamic RAG ingestion.")

    async def ingest_documents_into_rag(self, documents: List[Dict[str, Any]]):
        if not FAISS_AVAILABLE:
            print("Faiss library not available. Skipping RAG ingestion.")
            return
        if not self.faiss_manager:
            print("Faiss manager not initialized. Attempting to initialize for ingestion.")
            self._initialize_faiss_manager_globally(force_new_index=False)
            if not self.faiss_manager:
                print("Failed to initialize Faiss manager. Skipping RAG ingestion.")
                return

        if not documents:
            print("No documents provided for ingestion.")
            return

        print(f"DeepResearchSystem: Ingesting {len(documents)} documents into Global RAG Store...")

        added_to_master_store_count = 0
        for doc_data in documents:
            content = doc_data.get('content')
            doc_id = doc_data.get('id')
            summary = doc_data.get('summary', (content[:150] + "...") if content else "No summary.")

            if not doc_id or not content:
                print(f"Skipping document due to missing ID or content: {str(doc_data)[:100]}...")
                continue

            if doc_id not in self.master_document_store:
                self.master_document_store[doc_id] = {
                    "content": content,
                    "summary": summary,
                    "metadata": doc_data.get("metadata", {})
                }
                added_to_master_store_count += 1

        if added_to_master_store_count > 0:
            print(f"Added {added_to_master_store_count} new documents to master document store. Triggering Faiss update...")
            if hasattr(self.faiss_manager, 'add_documents_from_kb_async'):
                await self.faiss_manager.add_documents_from_kb_async()
            else:
                print("ERROR: Faiss manager is missing the 'add_documents_from_kb_async' method.")
            self._save_global_rag_stores()
        else:
            print("No new documents were added to the master document store (all might exist or none provided with content/ID).")

        if self.knowledge_base:
            if self.faiss_manager and self.faiss_manager.index:
                self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
            else:
                self.knowledge_base.global_faiss_doc_count = 0
        print(f"Global Faiss index count after ingestion attempt: {self.knowledge_base.global_faiss_doc_count if self.knowledge_base else (self.faiss_manager.index.ntotal if self.faiss_manager and self.faiss_manager.index else 'N/A')}")

    async def _call_llm(self, system_prompt_text: str, user_content_text: str, agent_type: str, use_search_tool: bool = False, stream_override: Optional[bool] = None) -> Tuple[str, Optional[Dict[str, Any]]]:
        model_name_to_use = "UnknownModel"
        full_text_response = ""
        function_call_details = None # This will capture API-level function calls if explicitly returned by API

        try:
            if not self.vertex_client:
                return "ERROR: Vertex AI Client (genai.Client) not initialized.", None
            if not GOOGLE_GENAI_SDK_AVAILABLE or not genai or not genai_types:
                return "ERROR: Google GenAI SDK or its types module not imported.", None

            model_config_key_map = {
                'Planner': 'planner',
                'SearchAnalyzer': 'researcher', # This agent will use the search tool
                'Consolidator': 'researcher',
                'Auxiliary_Lite_Task': 'flash_lite',
                'CreatorAgent_Planner': 'creator',
                'CreatorAgent_Executor': 'creator',
                'JSON_FIXER_TASK': 'flash_lite'
            }
            model_config_key = model_config_key_map.get(agent_type, 'researcher')

            if model_config_key not in GEMINI_CONFIG:
                print(f"Warning: Config key '{model_config_key}' for agent '{agent_type}' not in GEMINI_CONFIG. Defaulting to 'researcher'.")
                model_config_key = 'researcher'
            if model_config_key not in GEMINI_CONFIG: # Final fallback
                 model_config_key = 'flash_lite'
                 if 'flash_lite' not in GEMINI_CONFIG: return "ERROR: No valid LLM config.", None

            active_model_config = GEMINI_CONFIG[model_config_key]
            model_name_to_use = active_model_config['model_name']
            max_tokens = active_model_config.get('max_tokens', 15000)
            temperature = active_model_config.get('temperature', 0.25)
            top_p = active_model_config.get('top_p', 0.95)

            print(f"\n🤖 Calling LLM (Agent: {agent_type}, Model: {model_name_to_use}). User content len: {len(user_content_text)}. Use Search Tool: {use_search_tool}")

            sys_instruct = {"role": "system", "parts": [{"text": str(system_prompt_text)}]}
            usr_content = {"role": "user", "parts": [{"text": str(user_content_text)}]}
            contents_for_api = [usr_content]

            safety_settings_list = []
            # ... (safety settings population as in your "20th code" - ensure it's correct)
            if genai_types and hasattr(genai_types, 'HarmCategory') and hasattr(genai_types, 'HarmBlockThreshold'):
                try:
                    safety_settings_list = [
                        {"category": genai_types.HarmCategory.HARM_CATEGORY_HATE_SPEECH.name, "threshold": genai_types.HarmBlockThreshold.BLOCK_NONE.name},
                        {"category": genai_types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT.name, "threshold": genai_types.HarmBlockThreshold.BLOCK_NONE.name},
                        {"category": genai_types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT.name, "threshold": genai_types.HarmBlockThreshold.BLOCK_NONE.name},
                        {"category": genai_types.HarmCategory.HARM_CATEGORY_HARASSMENT.name, "threshold": genai_types.HarmBlockThreshold.BLOCK_NONE.name} ]
                except AttributeError: safety_settings_list = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
            else: safety_settings_list = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]


            api_call_config_dict = {
                "temperature": temperature, "top_p": top_p, "max_output_tokens": max_tokens,
                "safety_settings": safety_settings_list,
                "system_instruction": sys_instruct # Correct for genai.Client
            }

            if use_search_tool:
                print("   DEBUG_LLM: Enabling 'google_search' tool in API call config.")
                api_call_config_dict["tools"] = [{"google_search": {}}]

            should_stream_this_call = stream_override if stream_override is not None else (agent_type == "Consolidator")

            if should_stream_this_call:
                # ... (Streaming logic - ensure it robustly gets text and handles potential errors)
                print(f"   DEBUG_LLM: Streaming from Vertex Client (model: {model_name_to_use})...")
                streamed_parts = []
                iterator = await asyncio.to_thread(
                    self.vertex_client.models.generate_content_stream,
                    model=model_name_to_use,
                    contents=contents_for_api,
                    config=api_call_config_dict
                )
                def consume_stream(it):
                    nonlocal full_text_response # Only need to build full_text_response for streaming
                    for chunk in it:
                        if hasattr(chunk, 'text') and chunk.text:
                            print(chunk.text, end="", flush=True)
                            streamed_parts.append(chunk.text)
                        # For model-executed search, we don't expect tool_call_details back here for SearchAnalyzer
                await asyncio.to_thread(consume_stream, iterator)
                full_text_response = "".join(streamed_parts)
                print("\n   --- End of Stream ---")

            else: # Non-streaming call
                print(f"   DEBUG_LLM: Non-streaming call to Vertex Client (model: {model_name_to_use})...")
                response_object = None
                try:
                    response_object = await asyncio.to_thread(
                        self.vertex_client.models.generate_content,
                        model=model_name_to_use,
                        contents=contents_for_api, # contents_for_api was from 6th code
                        config=api_call_config_dict
                    )
                except Exception as e_gen_content:
                    error_message = f"ERROR: LLM generate_content call failed - {type(e_gen_content).__name__}: {e_gen_content}"
                    print(f"ERROR_LLM: {error_message}")
                    full_text_response = error_message
                    # ... (logging)
                    return full_text_response, None # Return None for function_call_details

                if response_object:
                    full_text_response = response_object.text if hasattr(response_object, 'text') and response_object.text is not None else ""
                    if full_text_response:
                         print(f"  DEBUG_LLM (ModelExecSearch): Received text response (len {len(full_text_response)}): '{full_text_response[:100]}...'")

                    # Check for API-level function call (might still be there for other tools or future use)
                    if hasattr(response_object, 'candidates') and response_object.candidates and \
                       response_object.candidates[0].content and response_object.candidates[0].content.parts:
                        for part_data_obj in response_object.candidates[0].content.parts:
                            if hasattr(part_data_obj, 'function_call') and part_data_obj.function_call:
                                fc = part_data_obj.function_call
                                fc_name_val = getattr(fc, 'name', None)
                                fc_args_raw_val = getattr(fc, 'args', None)
                                if fc_name_val:
                                     fc_args_converted = dict(fc_args_raw_val) if fc_args_raw_val is not None else {}
                                     function_call_details = {"name": str(fc_name_val), "args": fc_args_converted}
                                     print(f"🛠️ DEBUG_LLM (ModelExecSearch): API-Level Tool Call DETECTED: Name: {fc_name_val}, Args: {fc_args_converted}")
                                     break

                    if not full_text_response and not function_call_details:
                        if hasattr(response_object, 'prompt_feedback'):
                            # ... (block reason handling) ...
                            feedback = response_object.prompt_feedback
                            if hasattr(feedback, 'block_reason') and feedback.block_reason:
                                error_message = f"LLM call blocked. Reason: {feedback.block_reason}"
                                if hasattr(feedback, 'block_reason_message') and feedback.block_reason_message: error_message += f" Message: {feedback.block_reason_message}"
                                print(f"ERROR_LLM: {error_message}")
                                full_text_response = f"ERROR: {error_message}"
                            else: full_text_response = "" # Ensure empty if no text, no tool, no block
                        else: full_text_response = ""
                else:
                    print("ERROR_LLM: response_object is None after non-streaming API call.")
                    full_text_response = "ERROR: LLM API call resulted in no response object."

            print(f"  DEBUG_LLM_RETURN: About to return from _call_llm. Response text (len {len(full_text_response)}): '{full_text_response[:100]}...'. API Parsed Function Call Details: {function_call_details}")
            return full_text_response, function_call_details

        except Exception as e:
            error_message = f"ERROR: LLM call failed (Outer Catch - ModelExecSearch) - {type(e).__name__}: {e}"
            # ... (outer catch logging as before) ...
            print(f"ERROR_LLM: {error_message}"); traceback.print_exc()
            log_details = {"model_used": model_name_to_use, "agent_type": agent_type, "use_search_tool": use_search_tool, "user_content_snippet": user_content_text[:200] + "..."}
            specific_error_type = ""
            if GOOGLE_API_CORE_EXCEPTIONS_AVAILABLE and google_api_core_exceptions:
                if isinstance(e, google_api_core_exceptions.ResourceExhausted): specific_error_type = "ResourceExhausted"
                elif isinstance(e, google_api_core_exceptions.InvalidArgument): specific_error_type = "InvalidArgument"
                elif isinstance(e, google_api_core_exceptions.NotFound): specific_error_type = f"NotFound (Model {model_name_to_use})"
            if specific_error_type: error_message = f"ERROR: {specific_error_type}. Details: {e}"
            self.research_log.append({"agent": "LLM_CALLER_VERTEX", "error": error_message, "details": log_details, "timestamp": time.time()})
            if self.knowledge_base: self.knowledge_base.add_error(f"LLM_CALLER_VERTEX ({agent_type})", error_message, log_details)
            print(f"  DEBUG_LLM_RETURN: About to return from EXCEPTION. error_message: {error_message[:100]}. function_call_details: None")
            return error_message if isinstance(error_message, str) else str(error_message), None



    async def _get_vertex_embeddings_batch(self,
                                           texts: List[str],
                                           task_type: str,
                                           titles: Optional[List[Optional[str]]] = None,
                                           output_dimensionality: Optional[int] = None,
                                           max_retries: int = 3,
                                           initial_backoff_seconds: float = 2.0
                                           ) -> List[Optional[List[float]]]:
        if not self.embedding_model:
            error_msg = "ERROR (_get_vertex_embeddings_batch): Embedding model (self.embedding_model) is not available."
            print(error_msg)
            self.research_log.append({
                "agent": "EmbeddingSystem", "error": "Embedding model not loaded",
                "details": "self.embedding_model was None.", "timestamp": time.time()
            })
            if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", error_msg, {"detail": "self.embedding_model was None."})
            return [None] * len(texts)

        if not VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY:
            error_msg = "ERROR (_get_vertex_embeddings_batch): Global Vertex AI SDK was not initialized successfully."
            print(error_msg)
            self.research_log.append({
                "agent": "EmbeddingSystem", "error": "Vertex SDK not initialized",
                "details": "VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY is False.", "timestamp": time.time()
            })
            if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", error_msg, {"detail": "VERTEX_AI_SDK_INITIALIZED_SUCCESSFULLY is False."})
            return [None] * len(texts)

        if not TEXT_EMBEDDING_MODEL_CLASS_AVAILABLE or not TextEmbeddingInput:
            error_msg = "ERROR (_get_vertex_embeddings_batch): Vertex AI TextEmbeddingModel or TextEmbeddingInput class not available."
            print(error_msg)
            self.research_log.append({
                "agent": "EmbeddingSystem", "error": "SDK classes missing",
                "details": "TextEmbeddingModel or TextEmbeddingInput not imported.", "timestamp": time.time()
            })
            if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", error_msg, {"detail": "TextEmbeddingModel or TextEmbeddingInput not imported."})
            return [None] * len(texts)

        if not texts:
            return []

        all_embeddings_results: List[Optional[List[float]]] = [None] * len(texts)
        effective_output_dim = output_dimensionality if output_dimensionality is not None else FAISS_DIMENSION

        num_texts_to_process = len(texts)
        embedding_model_display_name = "gemini-embedding-001 (via TextEmbeddingModel)"
        print(f"   _get_vertex_embeddings_batch: Preparing to process {num_texts_to_process} texts INDIVIDUALLY for '{embedding_model_display_name}', task: {task_type}, target_dim: {effective_output_dim}")

        for i, text_content in enumerate(texts):
            current_title = titles[i] if titles and i < len(titles) else None
            print(f"      Processing text {i + 1}/{num_texts_to_process}: '{text_content[:50]}...'")

            instance_payload: Dict[str, Any] = {"text": text_content, "task_type": task_type}
            if task_type == "RETRIEVAL_DOCUMENT" and current_title:
                instance_payload["title"] = current_title

            instance_to_embed: Optional[TextEmbeddingInput] = None
            try:
                instance_to_embed = TextEmbeddingInput(**instance_payload)
            except Exception as e_input_create:
                err_msg = f"ERROR creating TextEmbeddingInput for text index {i} ('{text_content[:50]}...'): {e_input_create}"
                print(f"    {err_msg}")
                self.research_log.append({
                    "agent": "EmbeddingSystem", "error": "TextEmbeddingInput creation failed",
                    "details": {"text_index": i, "exception": str(e_input_create)}, "timestamp": time.time()
                })
                if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", f"TextEmbeddingInput creation failed for text {i}", {"exception": str(e_input_create)})
                all_embeddings_results[i] = None
                if i < num_texts_to_process - 1 and EMBEDDING_API_CALL_DELAY_SECONDS > 0: # Apply delay before next text
                    print(f"      Pausing for {EMBEDDING_API_CALL_DELAY_SECONDS:.2f}s after TextEmbeddingInput creation error...")
                    await asyncio.sleep(EMBEDDING_API_CALL_DELAY_SECONDS)
                continue

            current_retries = 0
            current_backoff = initial_backoff_seconds
            call_successful_for_this_text = False

            while current_retries <= max_retries:
                try:
                    api_kwargs = {"auto_truncate": True}
                    if effective_output_dim:
                        api_kwargs["output_dimensionality"] = effective_output_dim

                    print(f"        Attempting API call for text {i + 1} (Retry {current_retries})...")

                    embedding_responses = await asyncio.to_thread(
                        self.embedding_model.get_embeddings,
                        [instance_to_embed],
                        **api_kwargs
                    )

                    if embedding_responses and len(embedding_responses) == 1:
                        response_part = embedding_responses[0]
                        if hasattr(response_part, 'values') and response_part.values is not None:
                            all_embeddings_results[i] = response_part.values
                            call_successful_for_this_text = True
                            print(f"        Text {i + 1} embedding successful.")
                            break
                        else:
                            print(f"    WARNING: Received no embedding values for text index {i}. Response part: {response_part}")
                            all_embeddings_results[i] = None
                            break
                    else:
                        print(f"    WARNING: Received unexpected response structure or count for text {i + 1}. Expected 1 response, got {len(embedding_responses) if embedding_responses else 0}.")
                        break

                except google_api_core_exceptions.InvalidArgument as e_invalid_arg:
                    err_msg = f"ERROR (InvalidArgument) for text {i + 1}: {e_invalid_arg}"
                    print(f"    {err_msg}")
                    self.research_log.append({
                        "agent": "EmbeddingSystem", "error": "InvalidArgument for single text",
                        "details": {"text_index": i, "exception": str(e_invalid_arg)}, "timestamp": time.time()
                    })
                    if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", f"InvalidArgument for text {i}", {"exception": str(e_invalid_arg)})
                    all_embeddings_results[i] = None
                    break
                except google_api_core_exceptions.TooManyRequests as e_quota:
                    err_msg = f"INFO (TooManyRequests) for text {i + 1}. Attempt {current_retries + 1}/{max_retries + 1}. Error: {e_quota}"
                    print(f"    {err_msg}")
                    self.research_log.append({
                        "agent": "EmbeddingSystem", "warning": "TooManyRequests for text",
                        "details": {"text_index": i, "retry_attempt": current_retries + 1, "exception": str(e_quota)}, "timestamp": time.time()
                    })
                    current_retries += 1
                    if current_retries > max_retries:
                        print(f"      Max retries reached for text {i + 1} due to quota. Marking as failed.")
                        if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", f"Max retries (quota) for text {i}", {"exception": str(e_quota)})
                        break
                    print(f"      Retrying text {i + 1} in {current_backoff:.2f} seconds...")
                    await asyncio.sleep(current_backoff)
                    current_backoff = min(current_backoff * 2, 60.0)
                except Exception as e_embed:
                    error_str_lower = str(e_embed).lower()
                    is_generic_quota_error = False
                    if GOOGLE_API_CORE_EXCEPTIONS_AVAILABLE is False: # Fallback check if specific exceptions weren't imported
                        if "quota" in error_str_lower or "429" in error_str_lower or "toomanyrequests" in str(type(e_embed).__name__).lower():
                            is_generic_quota_error = True

                    if is_generic_quota_error:
                        err_msg = f"INFO (Likely Quota Exceeded - Generic Catch) for text {i + 1}. Attempt {current_retries + 1}/{max_retries + 1}. Error: {e_embed}"
                        print(f"    {err_msg}")
                        self.research_log.append({
                            "agent": "EmbeddingSystem", "warning": "Likely Quota Exceeded (Generic)",
                            "details": {"text_index": i, "retry_attempt": current_retries + 1, "exception": str(e_embed)}, "timestamp": time.time()
                        })
                        current_retries += 1
                        if current_retries > max_retries:
                            if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", f"Max retries (generic quota) for text {i}", {"exception": str(e_embed)})
                            break
                        print(f"      Retrying text {i + 1} in {current_backoff:.2f} seconds (generic catch)...")
                        await asyncio.sleep(current_backoff); current_backoff = min(current_backoff * 2, 60.0)
                        continue # Continue to next retry attempt

                    # If not a recognized quota error, treat as unhandled
                    err_msg = f"ERROR (Unhandled Exception) for text {i + 1}. Error: {type(e_embed).__name__}: {e_embed}"
                    print(f"    {err_msg}"); traceback.print_exc()
                    self.research_log.append({
                        "agent": "EmbeddingSystem", "error": "Unhandled exception in single text embedding",
                        "details": {"text_index": i, "exception_type": type(e_embed).__name__, "exception": str(e_embed)}, "timestamp": time.time()
                    })
                    if self.knowledge_base: self.knowledge_base.add_error("EmbeddingSystem", f"Unhandled exception for text {i}", {"exception_type": type(e_embed).__name__, "exception": str(e_embed)})
                    break # Break from retry loop for this text

            if not call_successful_for_this_text:
                print(f"    Failed to embed text {i + 1}. It will have None embedding.")
                all_embeddings_results[i] = None

            if i < num_texts_to_process - 1 and EMBEDDING_API_CALL_DELAY_SECONDS > 0 :
                print(f"      Pausing for {EMBEDDING_API_CALL_DELAY_SECONDS:.2f}s before processing next text...")
                await asyncio.sleep(EMBEDDING_API_CALL_DELAY_SECONDS)

        successful_final_count = sum(1 for emb in all_embeddings_results if emb is not None)
        print(f"   _get_vertex_embeddings_batch: Finished processing all texts. Successfully embedded {successful_final_count}/{num_texts_to_process} texts.")
        return all_embeddings_results


    async def planner_agent_turn(self, current_cycle_num: int, max_cycles_for_run: int) -> List[str]:
        try:
            print(f"\n--- 🤔 Planner Agent Turn (Cycle {current_cycle_num}/{max_cycles_for_run}, V4: Full History Aware Batch RAG-Refinement) ---")
            if not self.knowledge_base:
                error_msg = "Planner: KnowledgeBase not initialized"
                print(error_msg); self.research_log.append({"agent": "Planner", "error_details": error_msg, "timestamp": time.time()})
                return []
            if not self.vertex_client:
                error_msg = "Planner: Vertex AI Client not initialized for LLM."
                print(error_msg); self.research_log.append({"agent": "Planner", "error_details": error_msg, "timestamp": time.time()})
                if self.knowledge_base: self.knowledge_base.add_error("Planner", error_msg)
                return []

            if self.faiss_manager and self.faiss_manager.index:
                self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
            elif self.knowledge_base:
                 self.knowledge_base.global_faiss_doc_count = 0

            kb_summary_for_phase1 = self.knowledge_base.get_summary_for_planner()

            cycle_awareness_prompt = f"CURRENT CYCLE STATUS: You are in planning Cycle {current_cycle_num} of {max_cycles_for_run} total cycles for this research run. Plan your queries strategically."
            if current_cycle_num >= max_cycles_for_run -1 :
                cycle_awareness_prompt += " You are in or nearing the final cycles. Focus on queries that will lead to a strong consolidation. Prioritize critical missing pieces over broad new explorations unless essential."
            print(f"   Planner: {cycle_awareness_prompt}")

            print("   Planner Phase 1: Generating initial candidate web search queries...")
            fresh_rag_insights_str_phase1 = "No specific fresh RAG insights for initial planning (RAG not available, empty, or no results)."
            pre_planning_rag_query_used_phase1 = "N/A"
            pre_planning_rag_results_for_log = []

            if FAISS_AVAILABLE and self.faiss_manager and hasattr(self.faiss_manager, 'search_global_index_async') and self.faiss_manager.index and self.faiss_manager.index.ntotal > 0:
                gap_section_match = re.search(r"=== IDENTIFIED KNOWLEDGE GAPS OR LEADS FOR FURTHER INVESTIGATION ===\n(.*?)\n===", kb_summary_for_phase1, re.DOTALL | re.IGNORECASE)
                identified_gaps_text = gap_section_match.group(1).strip() if gap_section_match else ""

                if identified_gaps_text and identified_gaps_text.lower() != "no specific knowledge gaps or new leads identified from analyses in this session yet.":
                    top_gaps = [line.strip("• ").strip() for line in identified_gaps_text.splitlines() if line.strip().startswith("• ")][:3]
                    if top_gaps:
                        rag_query_for_planner_context = f"Explore further details or solutions related to these identified knowledge gaps: {'; '.join(top_gaps)}"
                        pre_planning_rag_query_used_phase1 = f"Synthesized from Top KB Gaps: {rag_query_for_planner_context[:100]}..."
                    else:
                        rag_query_for_planner_context = self.knowledge_base.original_query
                        pre_planning_rag_query_used_phase1 = f"Original User Query (fallback for Phase 1 RAG): {self.knowledge_base.original_query[:100]}..."
                elif self.knowledge_base.pending_sub_queries and self.knowledge_base.pending_sub_queries[0] != self.knowledge_base.original_query :
                    rag_query_for_planner_context = " ".join(self.knowledge_base.pending_sub_queries[:2])
                    pre_planning_rag_query_used_phase1 = f"From Pending Queries: {rag_query_for_planner_context[:100]}..."
                else:
                    rag_query_for_planner_context = self.knowledge_base.original_query
                    pre_planning_rag_query_used_phase1 = f"Original User Query: {self.knowledge_base.original_query[:100]}..."

                print(f"     Planner Phase 1: Performing pre-planning RAG with: '{pre_planning_rag_query_used_phase1}' (top_k={PLANNER_RAG_CONTEXT_TOP_K})")

                pre_planning_rag_results = await self.faiss_manager.search_global_index_async(
                    rag_query_for_planner_context,
                    top_k=PLANNER_RAG_CONTEXT_TOP_K
                )
                pre_planning_rag_results_for_log = pre_planning_rag_results

                if pre_planning_rag_results:
                    insights_parts = ["FRESHLY RETRIEVED INTERNAL INSIGHTS (Consider for initial query generation):"]
                    for res_idx, res in enumerate(pre_planning_rag_results):
                        insights_parts.append(f"- Doc ID {res.get('internal_doc_id', 'N/A')} (Sim: {res.get('similarity_score', 0.0):.2f}): {res.get('summary', 'N/A')[:200]}...")
                    fresh_rag_insights_str_phase1 = "\n".join(insights_parts)
                    print(f"     Planner Phase 1: Found {len(pre_planning_rag_results)} RAG insights for initial context.")
            else:
                 print(f"     Planner Phase 1: Pre-planning RAG skipped. Conditions not met (FAISS_AVAILABLE={FAISS_AVAILABLE}, FaissManager Ready={bool(self.faiss_manager and self.faiss_manager.index and self.faiss_manager.index.ntotal > 0)})")

            full_session_query_history_str = "FULL SESSION QUERY HISTORY (Completed and Pending - Generate DRAFT queries distinct from these):\n"
            all_session_queries = list(self.knowledge_base.completed_sub_queries) + self.knowledge_base.pending_sub_queries
            unique_session_queries = sorted(list(set(q for q in all_session_queries if isinstance(q, str) and q.strip())))

            if unique_session_queries:
                max_history_items = 30
                history_to_show = unique_session_queries[-max_history_items:]
                for idx_hist, hist_q in enumerate(history_to_show):
                    status = "Completed" if hist_q in self.knowledge_base.completed_sub_queries else "Pending"
                    full_session_query_history_str += f"- ({status}) {hist_q[:150]}...\n"
                if len(unique_session_queries) > max_history_items:
                    full_session_query_history_str += f"...and {len(unique_session_queries) - max_history_items} older queries.\n"
            else:
                full_session_query_history_str += "  No queries processed or pending in this session yet.\n"

            user_content_phase1 = (
                f"Original Query for this session: {self.knowledge_base.original_query}\n\n"
                f"{cycle_awareness_prompt}\n\n"
                f"{full_session_query_history_str}\n"
                f"{fresh_rag_insights_str_phase1}\n\n"
                f"Comprehensive Knowledge Base Summary (Session ID: {self.knowledge_base.session_id}):\n{kb_summary_for_phase1}\n\n"
                "TASK: Based on ALL the information above (original query, cycle status, FULL SESSION QUERY HISTORY, fresh RAG insights, and full KB summary), "
                "generate 1 to 3 DRAFT web search queries. These DRAFT queries MUST be genuinely novel and distinct from the FULL SESSION QUERY HISTORY. "
                "They should aim to address key unanswered gaps or explore new, logical next steps for the research. "
                "These are initial candidates and will be further refined. Output as a JSON list of strings."
            )

            response_text_phase1, _ = await self._call_llm(PLANNER_AGENT_SYSTEM_PROMPT, user_content_phase1, agent_type="Planner")
            thoughts_phase1, output_str_phase1 = parse_llm_response(response_text_phase1)

            self.research_log.append({"agent": "Planner_Phase1",
                                      "thoughts_snippet": thoughts_phase1[:500]+"...", "output": output_str_phase1,
                                      "pre_planning_rag_query": pre_planning_rag_query_used_phase1,
                                      "pre_planning_rag_results_count": len(pre_planning_rag_results_for_log),
                                      "cycle_info_to_llm": cycle_awareness_prompt,
                                      "session_history_provided_len": len(unique_session_queries),
                                      "timestamp": time.time()})
            print(f"   🧠 Planner Phase 1 Thoughts (snippet): {thoughts_phase1[:200]}...")
            print(f"   💡 Planner Phase 1 Output (Candidate Queries Raw): {output_str_phase1}")

            if not output_str_phase1 or output_str_phase1.startswith("ERROR: LLM call failed"):
                error_msg = f"Planner Phase 1 LLM call failed. Error: {output_str_phase1}"
                print(error_msg); self.knowledge_base.add_error("Planner_Phase1", error_msg, {"raw_output": output_str_phase1})
                self.research_log[-1]["error_details"] = error_msg
                return []

            initial_candidate_queries = []
            try:
                parsed_candidates = json.loads(output_str_phase1)
                if isinstance(parsed_candidates, list):
                    initial_candidate_queries = [q for q in parsed_candidates if isinstance(q, str) and q.strip()]
                elif isinstance(parsed_candidates, dict) and "queries" in parsed_candidates and isinstance(parsed_candidates["queries"], list):
                     initial_candidate_queries = [q for q in parsed_candidates["queries"] if isinstance(q, str) and q.strip()]
                else:
                    print(f"   Planner Phase 1 output was not a direct list of queries: {type(parsed_candidates)}. Attempting to parse from string if applicable.")
                    if isinstance(output_str_phase1, str) and output_str_phase1.strip().startswith("[") and output_str_phase1.strip().endswith("]"):
                        pass
                    else:
                         initial_candidate_queries = [line.strip("- ").strip() for line in output_str_phase1.splitlines() if line.strip() and len(line.strip()) > 10]


            except json.JSONDecodeError as e:
                print(f"   ERROR: Planner Phase 1 output not valid JSON: '{output_str_phase1}'. Error: {e}. Attempting line-by-line recovery.")
                if not output_str_phase1.startswith("ERROR:") and "\n" in output_str_phase1:
                    initial_candidate_queries = [line.strip("- ").strip() for line in output_str_phase1.splitlines() if line.strip() and len(line.strip()) > 10]
                    if initial_candidate_queries: print(f"     Recovered candidates from lines: {initial_candidate_queries}")

            if not initial_candidate_queries:
                print("   Planner Phase 1 did not yield any candidate queries after parsing/recovery. Ending planner turn.")
                return []

            initial_candidate_queries = initial_candidate_queries[:3]
            print(f"   Planner Phase 1 generated {len(initial_candidate_queries)} candidate(s) for refinement: {initial_candidate_queries}")

            refined_queries_for_cycle = []
            if not initial_candidate_queries: return []

            print("\n   Planner Phase 2: Preparing batch RAG feedback for candidate queries...")

            candidate_details_for_prompt_list = []
            candidate_rag_feedback_map_for_log = {}

            for cand_idx, candidate_query in enumerate(initial_candidate_queries):
                candidate_prompt_part = f"--- Candidate Query {cand_idx + 1} ---\n"
                candidate_prompt_part += f"Candidate: \"{candidate_query}\"\n"

                rag_feedback_str_for_candidate = f"RAG Feedback for Candidate {cand_idx + 1}: No specific internal RAG documents found highly relevant to this candidate, or RAG search was not applicable/failed."
                candidate_rag_results_for_this_cand: List[Dict[str, Any]] = []
                if FAISS_AVAILABLE and self.faiss_manager and hasattr(self.faiss_manager, 'search_global_index_async') and self.faiss_manager.index and self.faiss_manager.index.ntotal > 0:
                    print(f"     Planner Phase 2: Performing RAG for candidate '{candidate_query[:70]}...' (top_k={PLANNER_CANDIDATE_REFINE_RAG_TOP_K})")
                    candidate_rag_results = await self.faiss_manager.search_global_index_async(
                        candidate_query,
                        top_k=PLANNER_CANDIDATE_REFINE_RAG_TOP_K
                    )
                    candidate_rag_results_for_this_cand = candidate_rag_results
                    if candidate_rag_results:
                        insights_parts = [f"INTERNAL RAG INSIGHTS for Candidate Query {cand_idx + 1} (\"{candidate_query[:70]}...\") -- Consider these to refine or discard the candidate:"]
                        for res_idx, res in enumerate(candidate_rag_results):
                            insights_parts.append(f"  - RAG Doc {res_idx+1} (ID {res.get('internal_doc_id', 'N/A')}, Sim: {res.get('similarity_score',0.0):.2f}): {res.get('summary', 'N/A')[:200]}...")
                        rag_feedback_str_for_candidate = "\n".join(insights_parts)
                        print(f"     Gathered {len(candidate_rag_results)} RAG results for candidate: '{candidate_query[:50]}...'")
                else:
                    print(f"     Planner Phase 2 RAG for candidate '{candidate_query[:50]}...' skipped. Conditions not met.")

                candidate_prompt_part += rag_feedback_str_for_candidate + "\n"
                candidate_details_for_prompt_list.append(candidate_prompt_part)
                candidate_rag_feedback_map_for_log[candidate_query] = [
                    {
                        "doc_id": r.get("internal_doc_id"),
                        "summary_preview": r.get("summary", r.get("content_preview", ""))[:250]+"...",
                        "score": f"{r.get('similarity_score', 0.0):.3f}"
                    }
                    for r in candidate_rag_results_for_this_cand
                ]

            batch_refinement_prompt_parts = [
                f"Original User Query (Overall Research Goal): {self.knowledge_base.original_query}\n",
                f"{cycle_awareness_prompt}\n",
                f"High-Level Knowledge Base Summary Snippet:\n{kb_summary_for_phase1[:1000]}...\n\n",
                "TASK: You are an expert research strategist. You previously generated the following DRAFT candidate web search queries. For each candidate query listed below, I have performed a targeted internal RAG search and provided its specific 'Internal RAG Feedback'.",
                "Your goal is to CRITICALLY ASSESS each candidate against its RAG feedback AND the overall research context (including cycle status) to produce a final, highly effective web search query OR to explicitly DISCARD the candidate if it's redundant or no longer strategic.",
                "INSTRUCTIONS FOR PROCESSING EACH CANDIDATE QUERY:",
                "For each 'Candidate Query' presented below:",
                "1. SCRUTINIZE its 'Internal RAG Feedback'. Does this feedback directly and comprehensively answer the candidate query? "
                "   - IF YES (RAG *fully* answers candidate): The refined output for this candidate MUST be an EMPTY STRING \"\". This indicates the query is no longer needed for web search.",
                "2. If RAG feedback is relevant but only partially answers, or if it reveals important new nuances, sub-topics, or specific entities related to the candidate query:",
                "   - REFINE the candidate query. The refined query MUST be a significant improvement:",
                "     a. Make it MORE SPECIFIC than the original candidate, drilling down into unaddressed details.",
                "     b. It MUST target aspects NOT explicitly and sufficiently covered by the provided RAG feedback for THIS candidate.",
                "     c. It should aim to uncover NEW information that builds upon, clarifies, or challenges the RAG insights.",
                "     d. Example Refinement: If candidate was 'AI in healthcare' and RAG feedback detailed 'AI for diagnostics', a refined query could be 'ethical implications of AI diagnostics in underfunded hospitals' or 'comparative accuracy of AI vs human radiologists for specific rare conditions mentioned in RAG'.",
                "3. If RAG feedback is sparse, irrelevant, or non-existent for the candidate query:",
                "   - Re-evaluate the candidate's value for WEB SEARCH based on the 'Original User Query', 'High-Level Knowledge Base Summary Snippet', and importantly, the 'CURRENT CYCLE STATUS'.",
                "   - IF STILL VALUABLE (especially if early/mid-cycles, or fills a critical gap for consolidation if late-cycles): You may keep the candidate query as is, or make minor improvements for clarity and focus.",
                "   - IF NO LONGER STRATEGICALLY VALUABLE (e.g., too broad for late cycles, or the overall KB summary implies it's indirectly covered, or it doesn't align with endgame consolidation needs): The refined output for this candidate should be an EMPTY STRING \"\".",
                "4. Ensure all final refined queries are distinct and target truly unknown information for WEB SEARCH.",
                "\nOUTPUT FORMAT: CRITICALLY IMPORTANT!",
                "Provide your output as a SINGLE JSON LIST of strings. This list MUST contain one string element for each candidate query you were given, in the exact same order.",
                "Each string element must be either the refined web search query or an empty string \"\" if the candidate is discarded.",
                "Example (if 3 candidates were given): [\"refined query for candidate 1\", \"\", \"refined query for candidate 3 (if kept or tweaked)\"]\n"
            ]
            batch_refinement_prompt_parts.extend(candidate_details_for_prompt_list)
            user_content_phase2 = "\n".join(batch_refinement_prompt_parts)

            print(f"   Planner Phase 2: Calling LLM for batch refinement of {len(initial_candidate_queries)} queries (Cycle {current_cycle_num}/{max_cycles_for_run}). User content len: {len(user_content_phase2)}")
            response_text_phase2, _ = await self._call_llm(PLANNER_AGENT_SYSTEM_PROMPT, user_content_phase2, agent_type="Planner")
            thoughts_phase2, output_str_phase2 = parse_llm_response(response_text_phase2)

            log_entry_details_for_refinement_logging = []
            for cq_log_idx, original_cand_query_log in enumerate(initial_candidate_queries):
                rag_feedback_for_log = candidate_rag_feedback_map_for_log.get(original_cand_query_log, [{"summary_preview": "N/A - RAG feedback capture error for this query in log"}])
                log_entry_details_for_refinement_logging.append({
                    "candidate_query_index": cq_log_idx,
                    "candidate_query": original_cand_query_log,
                    "rag_feedback_provided_to_llm_count": len(rag_feedback_for_log),
                    "rag_feedback_snippets_preview": [item['summary_preview'][:100]+"..." for item in rag_feedback_for_log[:2]]
                })

            self.research_log.append({
                "agent": "Planner_Phase2_BatchRefine",
                "input_details_for_refinement": log_entry_details_for_refinement_logging,
                "thoughts_snippet": thoughts_phase2[:500]+"...",
                "refined_output_raw": output_str_phase2,
                "cycle_info_to_llm": cycle_awareness_prompt,
                "timestamp": time.time()
            })
            print(f"   🧠 Planner Batch Refinement Thoughts (snippet): {thoughts_phase2[:200]}...")
            print(f"   💡 Planner Batch Refinement Output (Raw JSON List Expected): {output_str_phase2}")

            if not output_str_phase2 or output_str_phase2.startswith("ERROR: LLM call failed"):
                error_msg = f"Planner Phase 2 (Batch Refinement) LLM call failed. Error: {output_str_phase2}"
                print(error_msg); self.knowledge_base.add_error("Planner_Phase2_BatchRefine", error_msg, {"raw_output": output_str_phase2})
                self.research_log[-1]["error_details"] = error_msg
                print("   Fallback: Using initial candidate queries due to refinement failure.")
                return initial_candidate_queries

            try:
                parsed_refined_queries = json.loads(output_str_phase2)
                if isinstance(parsed_refined_queries, list) and len(parsed_refined_queries) == len(initial_candidate_queries):
                    for i, refined_q_str in enumerate(parsed_refined_queries):
                        original_candidate = initial_candidate_queries[i]
                        if isinstance(refined_q_str, str) and refined_q_str.strip():
                            final_q = refined_q_str.strip()
                            print(f"       Refined/Validated Query (from candidate '{original_candidate[:50]}...'): {final_q}")
                            if final_q not in self.knowledge_base.completed_sub_queries and \
                               final_q not in self.knowledge_base.pending_sub_queries and \
                               final_q not in refined_queries_for_cycle:
                                refined_queries_for_cycle.append(final_q)
                                self.knowledge_base.add_dependent_query(
                                    final_q,
                                    parent_queries=[original_candidate],
                                    reason=f"Batch-refined by Planner (Cycle {current_cycle_num}) after RAG feedback on '{original_candidate[:50]}...'."
                                )
                        else:
                            print(f"       Candidate query '{original_candidate[:100]}...' was DISCARDED by Planner during batch refinement.")
                else:
                    err_detail = f"Output was not a list of expected length. Expected {len(initial_candidate_queries)}, Got: {len(parsed_refined_queries) if isinstance(parsed_refined_queries, list) else type(parsed_refined_queries)}"
                    print(f"   ERROR: Planner Batch Refinement output error. {err_detail}. Raw: {output_str_phase2}")
                    self.knowledge_base.add_error("Planner_Phase2_BatchRefine", "Output list length mismatch", {"raw_output": output_str_phase2, "details": err_detail})
                    print("   Fallback: Using initial candidate queries due to refinement output format error.")
                    return initial_candidate_queries

            except json.JSONDecodeError as e:
                print(f"   ERROR: Planner Batch Refinement output not valid JSON: '{output_str_phase2}'. Error: {e}")
                corrected_list_str = await self._attempt_json_self_correction_with_llm(output_str_phase2, "Planner Phase 2 query list", expected_type="list")
                try:
                    parsed_refined_queries_corrected = json.loads(corrected_list_str)
                    if isinstance(parsed_refined_queries_corrected, list) and len(parsed_refined_queries_corrected) == len(initial_candidate_queries):
                        print("    Self-correction for Planner Phase 2 output successful (JSON list).")
                        for i, refined_q_str in enumerate(parsed_refined_queries_corrected):
                            original_candidate = initial_candidate_queries[i]
                            if isinstance(refined_q_str, str) and refined_q_str.strip():
                                final_q = refined_q_str.strip()
                                if final_q not in self.knowledge_base.completed_sub_queries and \
                                   final_q not in self.knowledge_base.pending_sub_queries and \
                                   final_q not in refined_queries_for_cycle:
                                    refined_queries_for_cycle.append(final_q)
                                    self.knowledge_base.add_dependent_query(final_q, [original_candidate], f"Batch-refined (self-corrected) by Planner (Cycle {current_cycle_num}) after RAG feedback on '{original_candidate[:50]}...'.")
                            else: print(f"       Candidate query '{original_candidate[:100]}...' was DISCARDED (after correction).")
                    else:
                        raise ValueError("Corrected output still not a list of expected length.")
                except Exception as e_corr:
                    print(f"    ERROR: Self-correction for Planner Phase 2 output also failed: {e_corr}. Raw: {output_str_phase2}, Corrected Attempt: {corrected_list_str}")
                    self.knowledge_base.add_error("Planner_Phase2_BatchRefine", f"JSONDecodeError after correction: {e_corr}", {"raw_output": output_str_phase2, "corrected_attempt": corrected_list_str})
                    print("   Fallback: Using initial candidate queries due to refinement JSON error (even after correction).")
                    return initial_candidate_queries

            print(f"   Planner Phase 2: Final selected queries for this cycle: {refined_queries_for_cycle}")
            return refined_queries_for_cycle

        except Exception as e_outer:
            error_msg = f"Critical error in Two-Phase Planner (V4): {e_outer}"; print(error_msg); traceback.print_exc()
            self.research_log.append({"agent": "Planner_TwoPhase_Batch_Outer", "error_details": error_msg, "exception_obj": str(e_outer), "timestamp": time.time()})
            if self.knowledge_base: self.knowledge_base.add_error("Planner_TwoPhase_Batch_Outer", "Critical turn error", {"exception": str(e_outer)})
            return []
        if not GOOGLE_SEARCH_API_KEY or not GOOGLE_SEARCH_ENGINE_ID:
            error_msg = "ERROR: Google Search API Key or Search Engine ID is not configured."
            print(f"      {error_msg}")
            if self.knowledge_base:
                self.knowledge_base.add_error("SearchAnalyzer_LiveSearch", error_msg, {"query": query, "detail": "API Key/Engine ID missing"})
            return error_msg # Return error string

        try:
            # This is a synchronous call, so we run it in a thread
            # Ensure 'build' is imported if GOOGLE_API_PYTHON_CLIENT_AVAILABLE is True
            # from googleapiclient.discovery import build # Already checked/imported at top level

            def search_sync():
                service = build("customsearch", "v1", developerKey=GOOGLE_SEARCH_API_KEY)
                # Note: Using .cse().list() for Programmable Search Engine
                res = service.cse().list(q=query, cx=GOOGLE_SEARCH_ENGINE_ID, num=num_results).execute()
                return res.get('items', [])

            search_items = await asyncio.to_thread(search_sync)

            formatted_results = []
            if search_items:
                for item_idx, item in enumerate(search_items):
                    # Ensure essential fields are present, provide defaults or skip if critical ones are missing
                    link = item.get("link")
                    title = item.get("title")
                    snippet = item.get("snippet")

                    if not link or not title:
                        print(f"        Skipping search result {item_idx+1} due to missing link or title.")
                        continue

                    formatted_results.append({
                        "source_url": link,
                        "source_title": title,
                        "key_extracted_info": snippet or "No snippet available.", # Use snippet as key_info
                        "snippet_from_search_engine": snippet or "No snippet available." # For consistency
                    })
                print(f"      Live Google Search returned {len(formatted_results)} formatted results for '{query}'.")
            else:
                print(f"      Live Google Search returned no results for '{query}'.")
            return formatted_results
        except Exception as e:
            error_msg = f"ERROR executing live Google Search for '{query}': {type(e).__name__} - {e}"
            print(f"      {error_msg}")
            traceback.print_exc() # For more detailed debugging during development
            if self.knowledge_base:
                self.knowledge_base.add_error("SearchAnalyzer_LiveSearch", error_msg, {"query": query, "exception_type": type(e).__name__})
            return error_msg # Return error string

    async def search_analyzer_agent_turn(self, query_to_execute: str) -> Dict[str, Any]:
        try:
            print(f"\n--- 🔎 Search & Analyzer Turn (Query: '{query_to_execute}', Model-Executed Google Search V2) ---")
            if not self.knowledge_base:
                error_payload = {"error": "KnowledgeBase not initialized for SearchAnalyzer", "query_executed": query_to_execute}
                self.research_log.append({"agent": "SearchAnalyzer", "error_details": error_payload["error"], "timestamp": time.time()})
                return error_payload
            if not self.vertex_client:
                error_payload = {"error": "Vertex AI Client not initialized for SearchAnalyzer", "query_executed": query_to_execute}
                # ... (standard error logging)
                if self.knowledge_base: self.knowledge_base.add_error("SearchAnalyzer", error_payload["error"], error_payload)
                return error_payload

            faiss_search_results_for_llm_context = []
            if FAISS_AVAILABLE and self.faiss_manager and hasattr(self.faiss_manager, 'search_global_index_async') and self.faiss_manager.index and self.faiss_manager.index.ntotal > 0:
                print(f"   DEBUG_SA_MODEL_SEARCH_V2: Querying Internal Faiss Index for: '{query_to_execute}' (top_k={RAG_TOP_K})")
                # ... (FAISS RAG search logic as in Turn 44) ...
                rag_results = await self.faiss_manager.search_global_index_async(query_to_execute, top_k=RAG_TOP_K)
                if rag_results:
                    print(f"   DEBUG_SA_MODEL_SEARCH_V2: Found {len(rag_results)} results from Global Faiss Index.")
                    for res in rag_results:
                        faiss_search_results_for_llm_context.append({
                            "internal_doc_id": res.get("internal_doc_id"),
                            "summary_of_internal_doc_content": res.get("summary", res.get("content_preview", "N/A"))[:1000],
                            "relevance_score_from_faiss": res.get("similarity_score")
                        })
            else:
                print(f"   DEBUG_SA_MODEL_SEARCH_V2: Internal RAG search skipped or yielded no results for '{query_to_execute}'.")

            user_content_for_llm = (
                f"Please perform a Google Search for the following query and then analyze the results as per your role, incorporating any relevant internal Faiss findings provided below:\n"
                f"Search Query to Execute via Google Search Tool: \"{query_to_execute}\"\n\n"
                "INTERNAL KNOWLEDGE BASE (FAISS INDEX) FINDINGS (Context for your analysis):\n"
                f"{json.dumps(faiss_search_results_for_llm_context, indent=2) if faiss_search_results_for_llm_context else 'No relevant documents were found in the internal Faiss index for this specific query.'}\n\n"
                "YOUR TASK:\n"
                "1. Use the Google Search tool with the 'Search Query to Execute via Google Search Tool'.\n"
                "2. Critically evaluate the search results you obtain (these will be part of your response if the tool call is successful internally by the model).\n"
                "3. Synthesize these findings along with any relevant 'INTERNAL KNOWLEDGE BASE (FAISS INDEX) FINDINGS'.\n"
                "4. Produce the comprehensive JSON output as specified by your role (SEARCH_ANALYZER_AGENT_SYSTEM_PROMPT). Ensure the 'search_results_summary' field in your JSON contains the information from the Google Search you performed, and 'google_search_needed_and_performed' is true."
            )

            print("   DEBUG_SA_MODEL_SEARCH_V2: Calling SearchAnalyzer LLM (expecting model-executed search & synthesis)...")
            response_text, tool_call_details_api = await self._call_llm(
                SEARCH_ANALYZER_AGENT_SYSTEM_PROMPT, # The main synthesis prompt
                user_content_for_llm,
                agent_type="SearchAnalyzer",
                use_search_tool=True # Critical: This enables the tool for the LLM
            )

            if tool_call_details_api:
                print(f"   WARNING_SA_MODEL_SEARCH_V2: _call_llm returned API tool call details: {tool_call_details_api}. This was not expected for model-executed search. LLM might be trying to delegate back.")

            thoughts, final_output_str = parse_llm_response(response_text)
            # ... (The rest of search_analyzer_agent_turn from Turn 44, including logging, JSON parsing, self-correction, and KB update, and RAG ingestion call)
            log_entry = {
                "agent": "SearchAnalyzer_ModelExecutedSearch_V2", "query_executed": query_to_execute,
                "thoughts_snippet": thoughts[:500]+"...",
                "llm_full_response_text": response_text,
                "llm_parsed_final_answer": final_output_str[:500]+"...",
                "api_tool_call_returned_by_call_llm": tool_call_details_api,
                "timestamp": time.time()
            }
            self.research_log.append(log_entry)
            print(f"   DEBUG_SA_MODEL_SEARCH_V2: 🧠 SearchAnalyzer Thoughts: {thoughts[:200]}...")
            print(f"   DEBUG_SA_MODEL_SEARCH_V2: 💡 SearchAnalyzer Parsed Final Answer: {final_output_str[:300]}...")

            if not final_output_str or final_output_str.startswith("ERROR:"):
                error_msg = f"SearchAnalyzer LLM call failed or returned error. Error: {final_output_str}"
                print(f"ERROR_SA_MODEL_SEARCH_V2: {error_msg}")
                error_payload = {"error": error_msg, "raw_error_message": final_output_str, "query_executed": query_to_execute}
                if self.knowledge_base: self.knowledge_base.add_error("SearchAnalyzer_Synth", error_payload["error"], error_payload)
                log_entry["error_details"] = error_msg
                return error_payload

            data = None
            try:
                data = json.loads(final_output_str)
            except json.JSONDecodeError as e:
                print(f"INFO_SA_MODEL_SEARCH_V2: Initial JSON parsing failed. Error: {e}. Attempting self-correction...")
                corrected_json_string = await self._attempt_json_self_correction_with_llm(final_output_str, query_to_execute, expected_type="object")
                try:
                    data = json.loads(corrected_json_string)
                    print("   DEBUG_SA_MODEL_SEARCH_V2: Self-correction successful.")
                except Exception as e_correction_parse:
                    error_msg = f"Failed to parse JSON after self-correction: {e_correction_parse}"
                    error_payload = {"error": error_msg, "original_raw_output": final_output_str, "corrected_attempt": corrected_json_string, "query_executed": query_to_execute}
                    if self.knowledge_base: self.knowledge_base.add_error("SearchAnalyzer_Parse", error_msg, error_payload)
                    log_entry["error_details"] = error_msg
                    return error_payload

            if data is None:
                 error_msg = "SearchAnalyzer final output could not be parsed or corrected into JSON."
                 error_payload = {"error": error_msg, "raw_output": final_output_str, "query_executed": query_to_execute}
                 if self.knowledge_base: self.knowledge_base.add_error("SearchAnalyzer_Parse", error_msg, error_payload)
                 log_entry["error_details"] = error_msg
                 return error_payload

            if not isinstance(data, dict):
                error_msg = f"SearchAnalyzer output was not a JSON object. Got type: {type(data)}"
                print(f"ERROR_SA_MODEL_SEARCH_V2: {error_msg} Parsed: {str(data)[:200]}...")
                error_payload = {"error": error_msg, "raw_output": final_output_str, "parsed_data_preview": str(data)[:200], "parsed_type": str(type(data)), "query_executed": query_to_execute}
                if self.knowledge_base: self.knowledge_base.add_error("SearchAnalyzer_Parse", error_msg, error_payload)
                log_entry["error_details"] = error_msg
                return error_payload

            data["google_search_needed_and_performed"] = data.get("google_search_needed_and_performed", False) or use_search_tool # If use_search_tool was true, model should reflect that
            if "search_query_executed" not in data or data["search_query_executed"] != query_to_execute:
                data["search_query_executed"] = query_to_execute

            list_fields_to_check = ["internal_faiss_results_summary", "google_search_results_summary", "new_questions_or_leads_from_this_query"]
            for field in list_fields_to_check:
                if not isinstance(data.get(field), list):
                    print(f"   WARNING_SA_MODEL_SEARCH_V2: Output for '{field}' not a list. Defaulting to []. Original: {data.get(field)}")
                    data[field] = []

            self.knowledge_base.add_search_analysis(query_to_execute, data)
            if data.get("google_search_results_summary"):
                 print("   DEBUG_SA_MODEL_SEARCH_V2: Triggering RAG ingestion for model-provided search results.")
                 await self._process_and_potentially_ingest_web_findings(data, query_to_execute)
            return data

        except Exception as e_outer_turn:
            error_msg = f"Critical error in SearchAnalyzer turn (Model-Executed Search V2): {e_outer_turn}"; print(error_msg); traceback.print_exc()
            error_payload = {"error": error_msg, "exception_obj": str(e_outer_turn), "query_executed": query_to_execute}
            if self.knowledge_base: self.knowledge_base.add_error("SearchAnalyzer_Outer", "Critical turn error", error_payload)
            self.research_log.append({"agent": "SearchAnalyzer_Outer_Critical", "query_executed": query_to_execute, "error_details": error_msg, "timestamp": time.time()})
            return error_payload


    async def _get_interim_consolidation(self) -> str:
        print("\n--- 🔄 Generating Interim Consolidated Report ---")
        if not self.knowledge_base:
            return "KnowledgeBase not initialized for interim report."
        kb_summary = self.knowledge_base.get_summary_for_consolidator(max_len=15000)
        user_content = (f"Original User Query for this session: {self.knowledge_base.original_query}\n\n"
                        f"Current Knowledge Base Summary (Session ID: {self.knowledge_base.session_id}):\n{kb_summary}\n\n"
                        "This is an INTERIM report. Please synthesize the current findings into a CONCISE summary "
                        "of the research progress so far for the Original User Query. Focus on key actionable insights, "
                        "what has been answered, and what major gaps remain. Keep it brief (2-4 paragraphs).")
        response_text, _ = await self._call_llm(CONSOLIDATOR_AGENT_SYSTEM_PROMPT,
                                                user_content,
                                                agent_type="Auxiliary_Lite_Task",
                                                stream_override=False)
        thoughts, report_str = parse_llm_response(response_text)
        if not report_str or report_str.startswith("ERROR: LLM call failed"):
            print(f"Consolidator (interim) LLM call failed: {report_str}")
            return (f"Interim Update (LLM synthesis failed for interim report - see logs for details):\n"
                    f"Original Query: {self.knowledge_base.original_query}\n"
                    f"Completed Queries: {len(self.knowledge_base.completed_sub_queries)}\n"
                    f"Pending Queries: {len(self.knowledge_base.pending_sub_queries)}\n"
                    f"Total Findings Logged: {len(self.knowledge_base.detailed_findings)}\n"
                    f"Global RAG Documents: {self.knowledge_base.global_faiss_doc_count}\n"
                    f"Review detailed logs for more specific progress.")
        return report_str

    async def consolidator_agent_turn(self, stream_final_report_override: Optional[bool] = None) -> str:
        print("\n--- ✍️ Consolidator Agent Turn (Final Report) ---")
        if not self.vertex_client or not self.knowledge_base:
            error_msg = "Consolidator: Vertex AI Client or KnowledgeBase not initialized."
            log_entry = {"agent": "Consolidator", "error_details": error_msg, "timestamp": time.time()}
            self.research_log.append(log_entry)
            if self.knowledge_base: self.knowledge_base.add_error("Consolidator", error_msg)
            return f"ERROR: {error_msg}"

        kb_summary_for_consolidator = self.knowledge_base.get_summary_for_consolidator()

        user_content = (f"Original User Query (Session ID: {self.knowledge_base.session_id}): {self.knowledge_base.original_query}\n\n"
                        f"Full Knowledge Base Summary (all findings from executed queries and created artifacts):\n{kb_summary_for_consolidator}\n\n"
                        "Your task is to synthesize ALL available information from the Knowledge Base Summary into a final, comprehensive, well-structured, and deeply insightful answer to the Original User Query. "
                        "If the primary goal was research, address all aspects of the original query, highlight key findings, different perspectives, and any limitations or gaps. "
                        "If the primary goal was creation of design artifacts (indicated by populated 'created_artifacts' in the KB summary), your main task is to present these created artifacts clearly, perhaps with a brief introduction and conclusion. "
                        "The output should be the final, polished answer intended for the end-user, within <final_answer> tags. Ensure the answer is directly readable and well-formatted (e.g., use Markdown for readability if appropriate for textual synthesis, but if presenting structured artifacts, ensure their structure is clear)."
                       )

        # CORRECTED LINE: Use stream_final_report_override
        should_stream_this_call = stream_final_report_override if stream_final_report_override is not None else True

        response_text, _ = await self._call_llm(CONSOLIDATOR_AGENT_SYSTEM_PROMPT,
                                                user_content,
                                                agent_type="Consolidator",
                                                stream_override=should_stream_this_call)

        thoughts, report_str = parse_llm_response(response_text)

        log_entry = {"agent": "Consolidator",
                     "thoughts_snippet": thoughts[:300] + "..." if thoughts and not should_stream_this_call else ("Streamed thoughts if any." if should_stream_this_call else "No thoughts."),
                     "final_report_preview": report_str[:500]+"..." if report_str and not report_str.startswith("ERROR:") else report_str,
                     "input_summary_length": len(kb_summary_for_consolidator),
                     "timestamp": time.time()}

        if not report_str or report_str.startswith("ERROR: LLM call failed"):
            log_entry["error_details"] = f"Consolidator (final) LLM call failed: {report_str}"
            print(log_entry["error_details"])
            if self.knowledge_base: self.knowledge_base.add_error("Consolidator", "LLM call failed", {"raw_error": report_str})
            self.research_log.append(log_entry)
            return (f"ERROR: The final report could not be generated due to an LLM error.\n"
                    f"Details: {report_str}\n"
                    f"Please review the research log for {self.knowledge_base.session_id} for any partial findings.")

        self.research_log.append(log_entry)

        if not should_stream_this_call:
            print(f"🧠 Consolidator Thoughts (snippet): {thoughts[:200]}...")
            print("\n--- 📜 Final Report (Non-Streamed) ---"); print(report_str); print("--- End of Report ---")
        return report_str

# ContextAbove:
#         # ... (previous consolidator_agent_turn method content) ...
#         return report_str

    async def creator_agent_turn(self,
                                 elaboration_task: str,
                                 blueprint_summary: str,
                                 current_creation_cycle: int, # This is the overall cycle for the task
                                 max_creation_cycles: int # This is the budget of cycles for the entire creation process
                                 ) -> Tuple[Optional[Dict[str, Any]], bool, str]:
        if not self.knowledge_base:
            return None, True, "CreatorAgent Error: KnowledgeBase not initialized."

        agent_name_plan = "CreatorAgent_Planner"
        agent_name_exec = "CreatorAgent_Executor"
        status_message = ""

        # --- Planning Phase ---
        if not self.knowledge_base.creator_plan and self.knowledge_base.creator_completed_subtasks_count == 0:
            print(f"\n--- 💡 Creator Agent - Planning Phase (Cycle {current_creation_cycle} of task budget {max_creation_cycles}) ---")
            self.knowledge_base.created_artifacts = {} # Reset artifacts when new plan is made

            # Calculate remaining cycles for execution after this planning step
            remaining_cycles_for_execution = max_creation_cycles - 1 # current_creation_cycle is 0 for planning
            if remaining_cycles_for_execution < 0: remaining_cycles_for_execution = 0

            planning_user_content = (
                f"ELABORATION TASK (Original User Query):\n{elaboration_task}\n\n"
                f"EXISTING BLUEPRINT SUMMARY (Primary Resource - First 15k chars of KB summary):\n{blueprint_summary[:15000]}...\n\n"
                f"AVAILABLE CREATION CYCLES (for all sub-tasks to be planned): {remaining_cycles_for_execution}\n\n"
                "Your task is to deconstruct the ELABORATION TASK into a sequence of manageable sub-tasks. "
                "For each sub-task, identify 2-3 keywords/phrases for RAG against the full blueprint_summary. "
                "Output a JSON object with a 'creation_plan' list as per your system prompt. "
                "Ensure the number of sub-tasks is reasonable for the available creation cycles."
            )
            response_text, _ = await self._call_llm(
                CREATOR_PLANNER_SYSTEM_PROMPT, planning_user_content, agent_type="CreatorAgent_Planner",
            )
            thoughts, plan_json_str = parse_llm_response(response_text)
            self.research_log.append({
                "agent": agent_name_plan, "thoughts_snippet": thoughts[:300]+"...",
                "output_plan_str": plan_json_str[:500]+"...", "timestamp": time.time()
            })

            parsed_plan_successfully = False
            try:
                if not plan_json_str or plan_json_str.startswith("ERROR:"):
                    raise ValueError(f"Creator Planner LLM call failed or returned error: {plan_json_str}")

                plan_data = json.loads(plan_json_str)

                if isinstance(plan_data, dict) and isinstance(plan_data.get("creation_plan"), list) and plan_data["creation_plan"]:
                    self.knowledge_base.creator_plan = plan_data["creation_plan"]
                    parsed_plan_successfully = True
                elif isinstance(plan_data, list): # LLM might return the list directly
                    print("   Creator Planner: LLM returned a list directly, attempting to wrap in {'creation_plan': ...}")
                    self.knowledge_base.creator_plan = plan_data
                    parsed_plan_successfully = True
                elif isinstance(plan_data, dict) and "sub_task_id" in plan_data: # LLM might return a single sub-task object
                    print("   Creator Planner: LLM returned a single sub-task object, attempting to wrap in a list.")
                    self.knowledge_base.creator_plan = [plan_data]
                    parsed_plan_successfully = True

                if parsed_plan_successfully and self.knowledge_base.creator_plan:
                    status_message = f"Creator Agent: Generated creation plan with {len(self.knowledge_base.creator_plan)} sub-tasks."
                    print(f"   {status_message}")
                else:
                    status_message = "Creator Agent: Planning phase resulted in an empty or invalid plan structure after attempting recovery."
                    print(f"   ERROR: {status_message} Raw output was: {plan_json_str[:500]}...")
                    self.knowledge_base.add_error(agent_name_plan, status_message, {"raw_output": plan_json_str})
                    return None, True, status_message # Halt creation if planning fails definitively
            except Exception as e:
                status_message = f"CreatorAgent Planner failed to parse plan: {e}. Output: {plan_json_str[:500]}..."
                print(f"   ERROR: {status_message}"); self.knowledge_base.add_error(agent_name_plan, status_message, {"raw_output": plan_json_str})
                return None, True, status_message # Halt creation if planning fails

        # --- Execution Phase ---
        if not self.knowledge_base.creator_plan: # Check again, in case planning failed above
            return None, True, "CreatorAgent Error: No creation plan available to execute."

        if self.knowledge_base.creator_completed_subtasks_count >= len(self.knowledge_base.creator_plan):
            status_message = "Creator Agent: All planned sub-tasks previously marked complete."
            return {"artifact_id": "finalization", "generated_content": "All creation sub-tasks complete."}, True, status_message

        current_sub_task_index = self.knowledge_base.creator_completed_subtasks_count
        sub_task = self.knowledge_base.creator_plan[current_sub_task_index]
        sub_task_id = sub_task.get("sub_task_id", f"subtask_{current_sub_task_index}")
        sub_task_desc = sub_task.get("sub_task_description", "No description for sub-task.")

        print(f"\n--- 💡 Creator Agent - Execution Phase (Overall Cycle {current_creation_cycle} of budget {max_creation_cycles} - Sub-task {current_sub_task_index + 1}/{len(self.knowledge_base.creator_plan)}: {sub_task_desc[:100]}...) ---")

        rag_context_for_subtask = "No specific RAG context retrieved for this sub-task (FAISS manager/index unavailable or no keywords/results)."
        if FAISS_AVAILABLE and self.faiss_manager and hasattr(self.faiss_manager, 'search_global_index_async') and self.faiss_manager.index and self.faiss_manager.index.ntotal > 0:
            keywords = sub_task.get("required_blueprint_info_keywords", [])
            if keywords and isinstance(keywords, list) and all(isinstance(k, str) for k in keywords):
                rag_query = f"Regarding blueprint: details on {', '.join(keywords)} for task '{sub_task_desc[:70]}...'"
                print(f"     Creator RAG Query for blueprint: {rag_query[:150]}...")
                rag_results = await self.faiss_manager.search_global_index_async(rag_query, top_k=CREATOR_AGENT_RAG_TOP_K)
                if rag_results:
                    rag_context_parts = [f"Blueprint Snippet {i+1} (Doc ID: {r.get('internal_doc_id', 'N/A')}, Similarity: {r.get('similarity_score',0.0):.2f}):\n{r.get('summary', 'N/A')}" for i, r in enumerate(rag_results)]
                    rag_context_for_subtask = "\n\n".join(rag_context_parts)
                    print(f"     Retrieved {len(rag_results)} blueprint snippets for sub-task (max {CREATOR_AGENT_RAG_TOP_K}).")
                else:
                    print(f"     No RAG results found for keywords: {keywords}")
            else:
                print("     No valid keywords found for RAG query in sub-task.")

        execution_user_content = (
            f"OVERALL ELABORATION TASK (For Context):\n{elaboration_task}\n\n"
            f"CURRENT SUB-TASK ID: {sub_task_id}\nCURRENT SUB-TASK DESCRIPTION:\n{sub_task_desc}\n\n"
            f"RELEVANT BLUEPRINT CONTEXT (Retrieved via RAG from full blueprint_summary - Use this as primary info):\n{rag_context_for_subtask}\n\n"
            "YOUR TASK: Generate the specific design artifact piece for the CURRENT SUB-TASK. Output a JSON object as per your system prompt."
        )
        response_text_exec, _ = await self._call_llm(
            CREATOR_EXECUTOR_SYSTEM_PROMPT, execution_user_content, agent_type="CreatorAgent_Executor"
        )
        thoughts_exec, artifact_piece_json_str = parse_llm_response(response_text_exec)

        log_entry_details = {
            "agent": agent_name_exec, "sub_task_id": sub_task_id,
            "thoughts_snippet": thoughts_exec[:300]+"...",
            "output_artifact_piece_str": artifact_piece_json_str[:500]+"...",
            "rag_query_keywords_used": sub_task.get("required_blueprint_info_keywords", []), "timestamp": time.time()
        }
        self.research_log.append(log_entry_details)

        artifact_piece_data = None
        try:
            if not artifact_piece_json_str or artifact_piece_json_str.startswith("ERROR:"):
                raise ValueError(f"Creator Executor LLM call failed or returned error: {artifact_piece_json_str}")
            artifact_piece_data = json.loads(artifact_piece_json_str)
        except json.JSONDecodeError as e:
            print(f"INFO: Initial JSON parsing failed for Creator Executor. Error: {e}. Attempting self-correction...")
            corrected_json_string = await self._attempt_json_self_correction_with_llm(artifact_piece_json_str, sub_task_desc, expected_type="object")
            try:
                artifact_piece_data = json.loads(corrected_json_string)
                print("   Self-correction successful for Creator output.")
            except Exception as e_corr_parse:
                status_message = f"CreatorAgent Executor failed to parse artifact after self-correction for '{sub_task_id}': {e_corr_parse}. Original: {artifact_piece_json_str[:100]}... Corrected: {corrected_json_string[:100]}..."
                print(f"   ERROR: {status_message}")
                self.knowledge_base.add_error(agent_name_exec, status_message, {"raw_output": artifact_piece_json_str, "corrected_attempt": corrected_json_string, "sub_task": sub_task})
                self.knowledge_base.creator_completed_subtasks_count += 1
                return None, (self.knowledge_base.creator_completed_subtasks_count >= len(self.knowledge_base.creator_plan)), status_message

        if isinstance(artifact_piece_data, dict) and "generated_content" in artifact_piece_data:
            actual_artifact_id = artifact_piece_data.get("artifact_id", sub_task_id) # Prefer ID from LLM if provided and matches
            self.knowledge_base.created_artifacts[actual_artifact_id] = artifact_piece_data
            status_message = f"Creator Agent: Successfully generated artifact piece for sub-task ID '{actual_artifact_id}'. Confidence: {artifact_piece_data.get('confidence_score', 'N/A')}"
            print(f"   {status_message}")
            if artifact_piece_data.get("notes_or_questions_for_next_step"):
                print(f"     LLM Notes/Questions: {artifact_piece_data['notes_or_questions_for_next_step']}")
            self.knowledge_base.creator_completed_subtasks_count += 1
        elif artifact_piece_data is not None:
            status_message = f"Creator Agent: Executor output for '{sub_task_id}' was valid JSON but lacked 'generated_content' or wasn't a dict. Output: {str(artifact_piece_data)[:200]}..."
            print(f"   ERROR: {status_message}")
            self.knowledge_base.add_error(agent_name_exec, status_message, {"parsed_output": artifact_piece_data, "sub_task": sub_task})
            self.knowledge_base.creator_completed_subtasks_count += 1
            return None, (self.knowledge_base.creator_completed_subtasks_count >= len(self.knowledge_base.creator_plan)), status_message
        elif artifact_piece_data is None: # Parsing failed even after correction
             status_message = f"CreatorAgent Executor failed to parse artifact for '{sub_task_id}' even after correction attempt. Original Output: {artifact_piece_json_str[:200]}..."
             print(f"   ERROR: {status_message}")
             self.knowledge_base.add_error(agent_name_exec, status_message, {"raw_output": artifact_piece_json_str, "sub_task": sub_task})
             self.knowledge_base.creator_completed_subtasks_count += 1
             return None, (self.knowledge_base.creator_completed_subtasks_count >= len(self.knowledge_base.creator_plan)), status_message

        creation_is_complete = self.knowledge_base.creator_completed_subtasks_count >= len(self.knowledge_base.creator_plan)
        if creation_is_complete:
            status_message += " All creation sub-tasks are now marked complete."
            print(f"   {status_message}")

        return artifact_piece_data, creation_is_complete, status_message



    def _format_created_artifacts_to_markdown(self) -> str:
        if not self.knowledge_base or not self.knowledge_base.created_artifacts:
            return "No artifacts were created in this session."

        markdown_output = ["# CognitoSynth - Created Design Artifacts\n"]
        if self.knowledge_base.session_id:
            markdown_output.append(f"**Session ID:** {self.knowledge_base.session_id}")
        if self.knowledge_base.original_query:
            markdown_output.append(f"**Original Elaboration Task:** {self.knowledge_base.original_query}\n")

        if self.knowledge_base.creator_plan:
            markdown_output.append(f"**Creation Plan Summary:** {len(self.knowledge_base.creator_plan)} sub-tasks planned, {self.knowledge_base.creator_completed_subtasks_count} attempted/completed.\n")

        for artifact_id, artifact_data_obj in self.knowledge_base.created_artifacts.items():
            markdown_output.append(f"\n---\n## Artifact: `{artifact_id}`\n")
            if isinstance(artifact_data_obj, dict):
                generated_content = artifact_data_obj.get("generated_content", "No content generated for this artifact.")
                confidence = artifact_data_obj.get("confidence_score", "N/A")
                notes = artifact_data_obj.get("notes_or_questions_for_next_step", "")

                markdown_output.append(f"**Confidence Score:** {confidence}\n")
                if notes:
                    markdown_output.append(f"**LLM Notes/Questions:** {notes}\n")

                markdown_output.append("\n**Generated Content:**\n")
                if isinstance(generated_content, str):
                    is_diagram = False
                    mermaid_keywords = ["graph TD", "graph LR", "sequenceDiagram", "classDiagram", "stateDiagram", "erDiagram", "journey", "gantt", "pie"]
                    plantuml_keywords = ["@startuml"]

                    if any(keyword in generated_content for keyword in mermaid_keywords):
                        markdown_output.append(f"```mermaid\n{generated_content}\n```")
                        is_diagram = True
                    elif any(keyword.lower() in generated_content.lower() for keyword in plantuml_keywords):
                        markdown_output.append(f"```plantuml\n{generated_content}\n```")
                        is_diagram = True

                    if not is_diagram:
                        if '\n' in generated_content or len(generated_content) > 80:
                             markdown_output.append(f"```text\n{generated_content}\n```")
                        else:
                             markdown_output.append(generated_content)
                elif isinstance(generated_content, (dict, list)):
                    markdown_output.append(f"```json\n{json.dumps(generated_content, indent=2, ensure_ascii=False)}\n```")
                else:
                    markdown_output.append(f"```text\n{str(generated_content)}\n```")
            else:
                markdown_output.append(f"```text\n{str(artifact_data_obj)}\n```")

        return "\n".join(markdown_output)

    def _summarize_research_log_for_display(self, max_entries=25, max_len_per_entry=400) -> str:
        if not self.research_log:
            return "Research log is empty for this session."

        effective_max_entries = max_entries

        summary = f"Research Log Highlights for Session '{self.knowledge_base.session_id if self.knowledge_base else 'N/A'}' (most recent {effective_max_entries} entries):\n"
        entries_to_show = self.research_log[-effective_max_entries:]

        for i, entry in enumerate(reversed(entries_to_show)):
            ts_raw = entry.get("timestamp", 0)
            ts = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts_raw)) if ts_raw else "N/A"
            agent = entry.get("agent", "UnknownAgent")
            output_preview = ""
            error_info = entry.get("error_details") or entry.get("error")

            if agent == "Planner_Phase1":
                output_preview = f"Candidates: {str(entry.get('output', 'N/A'))[:150]}"
            elif agent == "Planner_Phase2_BatchRefine":
                output_preview = f"Refined: {str(entry.get('refined_output_raw', 'N/A'))[:150]}"
            elif agent.startswith("SearchAnalyzer"):
                query_exec = entry.get('query_executed', 'N/A')
                rag_count = entry.get("rag_results_provided_to_llm_count", 0)
                output_preview = f"Query: '{query_exec[:50]}...', RAG:{rag_count}"
                if entry.get("llm_final_output_preview", "N/A") != "N/A":
                     output_preview += f" -> Synth: {entry.get('llm_final_output_preview', '')[:70]}..."
            elif agent.startswith("CreatorAgent_Planner"):
                output_preview = f"Plan Output: {entry.get('output_plan_str', 'N/A')[:100]}..."
            elif agent.startswith("CreatorAgent_Executor") or agent == "CreatorAgent_CycleExecution":
                sub_task_id = entry.get('sub_task_id_attempted', entry.get('sub_task_id', 'N/A'))
                content_preview = entry.get('output_preview', entry.get('output_artifact_piece_str', 'N/A'))
                output_preview = f"Sub-task: '{sub_task_id}', Output: {str(content_preview)[:100]}..."
            elif agent == "Consolidator":
                output_preview = f"Report Preview: {entry.get('final_report_preview', 'N/A')[:100]}..."
            elif agent == "LLM_CALLER_VERTEX":
                 output_preview = str(error_info or "LLM Call details in log")
            else:
                output_preview = json.dumps({k:v for k,v in entry.items() if k != 'thoughts_snippet'})[:max_len_per_entry-50]

            if error_info and agent not in ["LLM_CALLER_VERTEX"]:
                 output_preview += f" | ERROR: {str(error_info)[:100]}..."

            summary_line = f"- [{ts}] {agent}: {output_preview}"
            summary += summary_line[:max_len_per_entry]
            summary += "...\n" if len(summary_line) > max_len_per_entry else "\n"

        if self.knowledge_base and self.knowledge_base.errors:
            summary += f"\n--- KnowledgeBase Errors Logged This Session ({len(self.knowledge_base.errors)}) ---\n"
            for err_idx, err in enumerate(self.knowledge_base.errors[-3:]): # Show last 3 KB errors
                summary += f"  KB Error {err_idx+1}: Agent '{err.get('agent', 'N/A')}' - {err.get('message', 'N/A')}\n"
                if err.get('details') and isinstance(err.get('details'), dict) and err['details'].get('raw_output'):
                    summary += f"    LLM Raw Output causing error (preview): {str(err['details']['raw_output'])[:150]}...\n"
                elif err.get('details'):
                    summary += f"    Details: {str(err.get('details'))[:150]}...\n"
        return summary

    async def run_deep_research(self, user_query: str,
                                current_session_id: Optional[str] = None,
                                stream_final_report_override: Optional[bool] = None,
                                max_cycles_override: Optional[int] = None) -> str:
        if not self.vertex_client:
            return "System Error: Vertex AI Client failed to initialize earlier. Cannot run research."

        if not self.knowledge_base or (current_session_id and self.knowledge_base.session_id != current_session_id):
            if current_session_id:
                if not self._load_session_state(current_session_id):
                    print(f"Could not load session '{current_session_id}'. Starting new KB for this session ID with query: '{user_query}'.")
                    self.knowledge_base = KnowledgeBase(user_query, current_session_id)
                    self.research_log = []
                elif self.knowledge_base and self.knowledge_base.original_query != user_query:
                     print(f"Warning: Loaded session '{current_session_id}' (orig_query: '{self.knowledge_base.original_query}') being run with new query focus: '{user_query}'. Updating session's original query and resetting creator state.")
                     self.knowledge_base.original_query = user_query
                     self.knowledge_base.pending_sub_queries = [user_query]
                     self.knowledge_base.creator_plan = []
                     self.knowledge_base.created_artifacts = {}
                     self.knowledge_base.creator_completed_subtasks_count = 0
            else:
                session_id_new = sanitize_filename(user_query) + "_" + str(int(time.time()))
                print(f"No specific session ID. Initializing new KB for session: {session_id_new} with query: '{user_query}'")
                self.knowledge_base = KnowledgeBase(user_query, session_id_new)
                self.research_log = []
        elif self.knowledge_base and self.knowledge_base.original_query != user_query :
            print(f"Warning: Existing KB for session '{self.knowledge_base.session_id}' (orig_query: '{self.knowledge_base.original_query}') being run with new query focus: '{user_query}'. Updating KB original query and resetting creator state.")
            self.knowledge_base.original_query = user_query
            self.knowledge_base.pending_sub_queries = [user_query]
            self.knowledge_base.creator_plan = []
            self.knowledge_base.created_artifacts = {}
            self.knowledge_base.creator_completed_subtasks_count = 0

        if not self.knowledge_base:
            error_msg = "System Error: KnowledgeBase could not be initialized for the research run."
            print(error_msg); self.research_log.append({"agent": "SystemRun", "error_details": error_msg, "timestamp": time.time()})
            return error_msg

        print(f"\n🚀 Starting/Continuing Research/Creation for Query: \"{self.knowledge_base.original_query}\" (Session: {self.knowledge_base.session_id})")

        if FAISS_AVAILABLE and (not self.faiss_manager or not self.faiss_manager.index):
            print("Faiss Manager/index not ready. Initializing...")
            self._initialize_faiss_manager_globally(force_new_index=False)

        if self.faiss_manager and self.faiss_manager.index:
             self.knowledge_base.global_faiss_doc_count = self.faiss_manager.index.ntotal
        elif self.knowledge_base:
             self.knowledge_base.global_faiss_doc_count = 0

        current_overall_cycle = 0
        max_total_cycles_for_this_task = max_cycles_override if max_cycles_override is not None else INITIAL_MAX_CYCLES_IF_NO_OTHER_STOP

        is_elaboration_task = "elaborate on this architecture" in user_query.lower() or \
                              ("leveraging the established" in user_query.lower() and "blueprint" in user_query.lower()) or \
                              ("design the internal workings" in user_query.lower() and "CIAL" in user_query) or \
                              ("illustrative learning pathway" in user_query.lower())

        max_planner_cycles = max_total_cycles_for_this_task
        if is_elaboration_task and "not to conduct new web searches" in user_query.lower():
            max_planner_cycles = 0
            print(f"Task identified as blueprint elaboration with NO NEW WEB SEARCHES. Max Planner cycles: {max_planner_cycles}. Total cycles for task: {max_total_cycles_for_this_task}.")
        elif is_elaboration_task:
            max_planner_cycles = min(1, max_total_cycles_for_this_task)
            print(f"Task identified as blueprint elaboration. Max Planner cycles: {max_planner_cycles}. Total cycles for task: {max_total_cycles_for_this_task}.")


        if IPYTHON_AVAILABLE and current_overall_cycle == 0:
            clear_output(wait=True)
            print(f"🚀 Initializing for: \"{self.knowledge_base.original_query}\" (Session: {self.knowledge_base.session_id})")
            print(f"Global Faiss Index: {self.knowledge_base.global_faiss_doc_count} documents. Max total cycles for task: {max_total_cycles_for_this_task}")
            if not hasattr(self, '_initial_state_displayed_this_run') or not self._initial_state_displayed_this_run:
                interim_report_display = await self._get_interim_consolidation()
                display(HTML(f"<pre style='white-space: pre-wrap; font-family: monospace; background-color: #f0f0f0; padding: 10px; border-radius: 5px;'><b>INITIAL STATE - Session: {self.knowledge_base.session_id}</b><br/>{interim_report_display}</pre>"))
                print(f"\n{self._summarize_research_log_for_display(max_entries=5, max_len_per_entry=200)}\n")
                self._initial_state_displayed_this_run = True

        search_queries_from_planner = ["initial_dummy_query_to_start_loop"]
        research_phase_active = True if max_planner_cycles > 0 else False
        research_phase_cycles_run_this_turn = 0

        if max_planner_cycles == 0 and is_elaboration_task:
            print("Skipping Research Phase as max_planner_cycles is 0 for this elaboration task.")
            research_phase_active = False
            search_queries_from_planner = []

        while research_phase_cycles_run_this_turn < max_planner_cycles and search_queries_from_planner and research_phase_active:
            current_overall_cycle += 1
            research_phase_cycles_run_this_turn +=1
            print(f"\n===== 🔄 RESEARCH CYCLE {current_overall_cycle}/{max_total_cycles_for_this_task} (Planner Phase {research_phase_cycles_run_this_turn}/{max_planner_cycles}) =====")
            start_cycle_time = time.time()
            search_queries_from_planner = await self.planner_agent_turn(current_overall_cycle, max_total_cycles_for_this_task)

            if not search_queries_from_planner:
                planner_error_logged = any(
                    "Planner" in entry.get("agent","") and
                    (entry.get("error_details") or (isinstance(entry.get("output"), str) and entry.get("output","").startswith("ERROR:")))
                    for entry in reversed(self.research_log[-3:])
                )
                if planner_error_logged: print("❌ Planner Agent encountered an error. Halting research phase.")
                else: print("✅ Planner: No more web search queries suggested. Ending research phase.")
                research_phase_active = False; break

            print(f"📋 Planner suggested {len(search_queries_from_planner)} web search queries: {search_queries_from_planner}")
            for i, query in enumerate(list(search_queries_from_planner)):
                if not query or not query.strip(): continue
                print(f"\n   >>> Processing Web/RAG Query {i+1}/{len(search_queries_from_planner)}: '{query}'")
                if query not in self.knowledge_base.completed_sub_queries and query not in self.knowledge_base.pending_sub_queries:
                    self.knowledge_base.add_dependent_query(query, [], "From Planner for current cycle.")
                analysis_result = await self.search_analyzer_agent_turn(query)
                if isinstance(analysis_result, dict) and "error" not in analysis_result:
                    if analysis_result.get("google_search_needed_and_performed") and analysis_result.get("google_search_results_summary"):
                        await self._process_and_potentially_ingest_web_findings(analysis_result, query)
                await asyncio.sleep(0.5)

            cycle_duration = time.time() - start_cycle_time
            self._save_current_session_state()
            if IPYTHON_AVAILABLE:
                clear_output(wait=True); print(f"🚀 Research for: \"{self.knowledge_base.original_query}\" - Cycle {current_overall_cycle} ENDED")
                interim_report = await self._get_interim_consolidation(); display(HTML(f"<pre style='white-space: pre-wrap;'><b>INTERIM (After Cycle {current_overall_cycle})</b><br/>{interim_report}</pre>"))
                print(f"\n{self._summarize_research_log_for_display(max_entries=5, max_len_per_entry=200)}\n")
            print(f"===== ✅ End of RESEARCH CYCLE {current_overall_cycle} (Duration: {cycle_duration:.2f}s) =====")
            if not self.knowledge_base.pending_sub_queries and search_queries_from_planner:
                 print("All planner queries processed, no other pending. Ending research phase early.");
                 research_phase_active = False; break
            await asyncio.sleep(1)

        final_report_str = ""
        creator_phase_was_active = False

        if is_elaboration_task and (not research_phase_active or research_phase_cycles_run_this_turn >= max_planner_cycles):
            print("\n✨ Research phase concluded or skipped. Task identified as blueprint elaboration. Triggering Creator Agent... ✨")
            creator_phase_was_active = True
            creation_complete_flag = False

            if not self.knowledge_base.creator_plan and self.knowledge_base.creator_completed_subtasks_count == 0:
                print("   Creator Agent: Performing initial planning sub-task...")
                if current_overall_cycle < max_total_cycles_for_this_task:
                    current_overall_cycle += 1
                    print(f"   (This planning step counts as overall cycle {current_overall_cycle}/{max_total_cycles_for_this_task})")
                    _, _, plan_status_msg = await self.creator_agent_turn(
                        elaboration_task=self.knowledge_base.original_query,
                        blueprint_summary=self.knowledge_base.get_summary_for_consolidator(max_len=75000),
                        current_creation_cycle=0,
                        max_creation_cycles=(max_total_cycles_for_this_task - research_phase_cycles_run_this_turn)
                    )
                    print(f"   Creator Planner Status: {plan_status_msg}")
                    self._save_current_session_state()
                    if not self.knowledge_base.creator_plan:
                        final_report_str = json.dumps({"creator_agent_error": "Failed to generate a creation plan. " + plan_status_msg})
                        creation_complete_flag = True
                else:
                    final_report_str = json.dumps({"creator_agent_error": "Not enough cycles remaining for Creator Agent planning."})
                    creation_complete_flag = True


            if self.knowledge_base.creator_plan and not creation_complete_flag:
                print(f"   Creator Agent: Plan ready with {len(self.knowledge_base.creator_plan)} sub-tasks. Starting execution phase...")

                while current_overall_cycle < max_total_cycles_for_this_task and not creation_complete_flag:
                    current_overall_cycle += 1
                    print(f"\n===== ✨ CREATION CYCLE {current_overall_cycle}/{max_total_cycles_for_this_task} (Creator Execution Sub-Task) =====")
                    start_cycle_time = time.time()

                    generated_artifact_piece, creation_is_complete_now, status_msg = await self.creator_agent_turn(
                        elaboration_task=self.knowledge_base.original_query,
                        blueprint_summary=self.knowledge_base.get_summary_for_consolidator(max_len=75000),
                        current_creation_cycle=current_overall_cycle,
                        max_creation_cycles=max_total_cycles_for_this_task
                    )
                    creation_complete_flag = creation_is_complete_now
                    print(f"   Creator Execution Status: {status_msg}")

                    self.research_log.append({
                        "agent": "CreatorAgent_CycleExecution", "cycle": current_overall_cycle,
                        "sub_task_id_attempted": generated_artifact_piece.get("artifact_id", "N/A") if isinstance(generated_artifact_piece, dict) else "N/A",
                        "output_preview": str(generated_artifact_piece)[:500]+"...",
                        "creation_complete_flag_returned": creation_is_complete_now,
                        "status_message_from_creator": status_msg, "timestamp": time.time()
                    })
                    cycle_duration = time.time() - start_cycle_time
                    self._save_current_session_state()
                    if IPYTHON_AVAILABLE:
                        clear_output(wait=True); print(f"🚀 Creation for: \"{self.knowledge_base.original_query}\" - Cycle {current_overall_cycle} ENDED")
                        interim_report = await self._get_interim_consolidation(); display(HTML(f"<pre style='white-space: pre-wrap;'><b>INTERIM (After Creation Cycle {current_overall_cycle})</b><br/>{interim_report}</pre>"))
                        print(f"\n{self._summarize_research_log_for_display(max_entries=5, max_len_per_entry=200)}\n")
                    print(f"===== ✅ End of CREATION CYCLE {current_overall_cycle} (Duration: {cycle_duration:.2f}s) =====")
                    if creation_is_complete_now: break
                    await asyncio.sleep(1)

            if self.knowledge_base.created_artifacts:
                final_report_str = self._format_created_artifacts_to_markdown()

                for artifact_id, artifact_data_obj in self.knowledge_base.created_artifacts.items():
                    if isinstance(artifact_data_obj, dict) and "generated_content" in artifact_data_obj:
                        content_to_save = artifact_data_obj["generated_content"]
                        if isinstance(content_to_save, str) and ("diagram" in artifact_id.lower() or "mermaid" in content_to_save.lower() or "graph TD" in content_to_save or "@startuml" in content_to_save.lower()):
                            ext = ".mmd" if ("mermaid" in content_to_save.lower() or "graph TD" in content_to_save) else ".puml" if "@startuml" in content_to_save.lower() else ".txt"
                            filename = os.path.join(SESSIONS_DIR, self.knowledge_base.session_id, f"{sanitize_filename(artifact_id)}{ext}")
                            try:
                                with open(filename, "w", encoding='utf-8') as f_out: f_out.write(content_to_save)
                                print(f"   Artifact '{artifact_id}' saved to: {filename}")
                            except Exception as e_save_art: print(f"Error saving artifact {artifact_id}: {e_save_art}")

            elif creator_phase_was_active and not final_report_str:
                 final_report_str = json.dumps({"creator_agent_error": "Creator agent was active but produced no compiled artifacts."})

        if not creator_phase_was_active or not final_report_str:
            print(f"\n===== 📜 Generating Final Consolidated Report via Consolidator for Session: {self.knowledge_base.session_id} =====")
            final_report_str = await self.consolidator_agent_turn(stream_final_report_override=stream_final_report_override)

        self._save_current_session_state()
        if hasattr(self, '_initial_state_displayed_this_run'):
            del self._initial_state_displayed_this_run
        return final_report_str



async def main_colab_runner_async():
    if not VERTEX_PROJECT_ID or not VERTEX_LOCATION:
        print("Error: VERTEX_PROJECT_ID and VERTEX_LOCATION must be set.")
        return

    print("Initializing Enhanced Deep Research System with FAISS RAG...")

    print("\n--- Research Session Management ---")

    sessions = list_previous_sessions() # This will now print extensive debug info
    print(f"DEBUG (main_colab_runner_async): list_previous_sessions() returned: {sessions}") # Log what list_previous_sessions returned

    load_session_id = None

    force_fresh_global_rag = False

    action = input("Choose action: [N]ew session, [L]oad session, [F]resh start (clears ALL global RAG & sessions): ").strip().upper()

    if action == 'L':
        if not sessions:
            print("No previous sessions found to load. Starting a new session.")
            action = 'N'
        else:
            print("Available sessions:")
            for i, (sid, q, dt) in enumerate(sessions): print(f"  {i+1}. {sid} (Query: '{q}' - Last modified: {dt})")
            try:
                choice_str = input(f"Enter number of session to load (1-{len(sessions)}), or 0 for new: ").strip()
                if not choice_str: action = 'N'; print("No choice, starting new session.")
                else:
                    choice = int(choice_str) -1
                    if choice == -1 :
                         action = 'N'; print("Starting new session as per choice 0.")
                    elif 0 <= choice < len(sessions):
                        load_session_id = sessions[choice][0]
                        print(f"Attempting to load session: {load_session_id}")
                    else:
                        print("Invalid choice. Starting a new session.")
                        action = 'N'
            except ValueError:
                print("Invalid input. Starting a new session.")
                action = 'N'
    elif action == 'F':
        confirm_fresh = input("WARNING: This will DELETE ALL global RAG data (Faiss index, document store) AND ALL past session logs. Are you absolutely sure? (yes/no): ").strip().lower()
        if confirm_fresh == 'yes':
            force_fresh_global_rag = True
            print("Proceeding with a FRESH START. All global RAG data and past sessions will be cleared.")
            if os.path.exists(SESSIONS_DIR):
                import shutil
                for item_name in os.listdir(SESSIONS_DIR):
                    item_path = os.path.join(SESSIONS_DIR, item_name)
                    if os.path.isdir(item_path):
                        if item_name != "global_rag":
                            try:
                                shutil.rmtree(item_path)
                                print(f"  Deleted session directory: {item_path}")
                            except Exception as e_shutil:
                                print(f"  Error deleting session dir {item_path}: {e_shutil}")
            action = 'N'
        else:
            print("Fresh start aborted. Defaulting to new session.")
            action = 'N'

    # Initialize DeepResearchSystem AFTER potentially setting force_fresh_global_rag
    # load_previous_session_id is correctly passed here now.
    research_system = DeepResearchSystem(
        project_id=VERTEX_PROJECT_ID,
        location=VERTEX_LOCATION,
        load_previous_session_id=load_session_id if action == 'L' else None,
        force_new_faiss_and_global_stores=force_fresh_global_rag
    )

    # Crucial check: if vertex_client is None after __init__, stop.
    if not research_system.vertex_client:
        print("STOPPING: DeepResearchSystem Vertex client failed to initialize (likely due to the ValueError during __init__). Cannot proceed.")
        return

    user_query_for_session = ""
    current_session_id_for_run = load_session_id if action == 'L' and load_session_id else None


    if action == 'N' or not current_session_id_for_run :
        try:
            user_query_for_session = input("Please enter your research query for the new session: ").strip()
            if not user_query_for_session:
                print("No query entered. Using default query.")
                user_query_for_session = "What are the latest advancements in quantum computing and their potential impact on cryptography?"
            current_session_id_for_run = None
        except EOFError:
            print("No input received (EOFError). Using default query for new session.")
            user_query_for_session = "Latest AI impact on renewable energy"

    elif current_session_id_for_run and research_system.knowledge_base:
        user_query_for_session = research_system.knowledge_base.original_query
        print(f"Continuing research for loaded session '{current_session_id_for_run}' with query: \"{user_query_for_session}\"")
        override_q = input(f"Current query is '{user_query_for_session}'. Enter a new query to refine or override, or press Enter to continue with current: ").strip()
        if override_q:
            user_query_for_session = override_q
            print(f"Query for this run overridden to: '{user_query_for_session}'")
    else:
        print("Error in session setup or KB not loaded. Exiting.")
        return


    if FAISS_AVAILABLE and research_system.faiss_manager:
        ingest_choice = input("Do you want to ingest new local documents into the Global RAG store before starting research? (y/N): ").strip().lower()
        if ingest_choice == 'y':
            num_docs_to_ingest = 0
            try:
                num_docs_to_ingest_str = input("How many sample documents to create and ingest? (e.g., 1-3, or 0 to skip): ").strip()
                if not num_docs_to_ingest_str: num_docs_to_ingest = 0
                else: num_docs_to_ingest = int(num_docs_to_ingest_str)

            except ValueError:
                print("Invalid number. Skipping ingestion.")

            if num_docs_to_ingest > 0:
                sample_docs_for_rag = []
                print("\n--- Preparing for Manual Document Ingestion ---")
                llm_suggestions_available = bool(research_system.vertex_client)
                if not llm_suggestions_available:
                    print("WARNING: Vertex client not available for automatic ID/summary generation. Manual input will be required for ID and summary.")

                for i in range(num_docs_to_ingest):
                    print(f"\n--- Document {i+1}/{num_docs_to_ingest} ---")
                    doc_content = ""
                    while not doc_content:
                        doc_content = input(f"Paste content for sample document {i+1} (min 50 chars): ").strip()
                        if not doc_content:
                            print("Content cannot be empty.")
                        elif len(doc_content) < 50:
                            print("Content too short, please provide more substantial text (min 50 chars).")
                            doc_content = "" # Reset to loop

                    # Default suggestions (if LLM fails or not available)
                    default_suggested_id = f"manual_{sanitize_filename(doc_content[:25], 25)}_{int(time.time() * 1000) % 100000}" # Added ms
                    default_suggested_summary = doc_content[:120] + "..."

                    final_id_suggestion = default_suggested_id
                    final_summary_suggestion = default_suggested_summary

                    if llm_suggestions_available:
                        print("   Generating suggestions for ID and summary using LLM...")
                        suggestion_prompt = (
                            f"Given the following document content, suggest a concise, descriptive ID (max 40 chars, "
                            f"alphanumeric and hyphens only, e.g., 'key-concept-short-title') and a brief summary (max 150 chars).\n\n"
                            f"DOCUMENT CONTENT (first 1000 chars):\n{doc_content[:1000]}\n\n"
                            f"Output ONLY in this JSON format: {{\"suggested_id\": \"your-id\", \"suggested_summary\": \"Your summary.\"}}"
                        )
                        try:
                            response_text, _ = await research_system._call_llm(
                                system_prompt_text="You are an assistant that generates document IDs and summaries.",
                                user_content_text=suggestion_prompt,
                                agent_type="Auxiliary_Lite_Task"
                            )
                            _, parsed_suggestions_str = parse_llm_response(response_text)
                            if parsed_suggestions_str:
                                suggestions = json.loads(parsed_suggestions_str)
                                if isinstance(suggestions, dict):
                                    llm_id_sugg = suggestions.get("suggested_id")
                                    llm_sum_sugg = suggestions.get("suggested_summary")
                                    if llm_id_sugg:
                                        final_id_suggestion = sanitize_filename(llm_id_sugg, 40)
                                    if llm_sum_sugg:
                                        final_summary_suggestion = llm_sum_sugg[:150]
                                print(f"   LLM Suggestion - ID: {final_id_suggestion}, Summary: {final_summary_suggestion}")
                        except Exception as e_suggest:
                            print(f"   Could not get LLM suggestions: {e_suggest}. Using default suggestions based on content.")
                            # final_id_suggestion and final_summary_suggestion already hold defaults

                    # ID handling
                    doc_id_to_use = final_id_suggestion
                    if doc_id_to_use in research_system.master_document_store:
                        print(f"   Suggested ID '{doc_id_to_use}' already exists.")
                        user_doc_id_input = ""
                        while not user_doc_id_input or user_doc_id_input in research_system.master_document_store:
                            user_doc_id_input = input(f"   Please enter a unique ID (or press Enter to slightly modify suggestion): ").strip()
                            if not user_doc_id_input: # User pressed Enter
                                doc_id_to_use = f"{final_id_suggestion}_{int(time.time()*1000)%1000}" # Append ms to make it unique
                                if doc_id_to_use not in research_system.master_document_store:
                                    print(f"   Using modified unique ID: {doc_id_to_use}")
                                    break
                                else: # Should be very rare
                                    print(f"   Modified ID '{doc_id_to_use}' also exists. Please enter manually.")
                                    user_doc_id_input = "force_manual" # to break inner and re-prompt outer
                            elif user_doc_id_input in research_system.master_document_store:
                                print(f"   ID '{user_doc_id_input}' already exists. Please choose another.")
                            else: # User provided a new, unique ID
                                doc_id_to_use = user_doc_id_input
                                break
                        if not user_doc_id_input and doc_id_to_use in research_system.master_document_store: # If fallback modification still failed
                             doc_id_to_use = input("   Fallback ID also taken. Please enter a fully unique ID: ").strip() # Final manual attempt
                             while not doc_id_to_use or doc_id_to_use in research_system.master_document_store:
                                 doc_id_to_use = input("   ID cannot be empty or duplicate. Please enter a unique ID: ").strip()


                    else: # Suggested ID is unique
                        user_override_id = input(f"   Use suggested ID '{doc_id_to_use}'? (Y/n for manual override): ").strip().lower()
                        if user_override_id == 'n':
                            temp_id = ""
                            while not temp_id or temp_id in research_system.master_document_store:
                                temp_id = input(f"   Enter your unique ID: ").strip()
                                if not temp_id: print("   ID cannot be empty.")
                                elif temp_id in research_system.master_document_store: print(f"   ID '{temp_id}' already exists.")
                            doc_id_to_use = temp_id
                        else:
                            print(f"   Using ID: {doc_id_to_use}")

                    doc_id = doc_id_to_use

                    # Summary handling
                    doc_summary_to_use = final_summary_suggestion
                    user_override_summary = input(f"   Use suggested summary '{doc_summary_to_use[:70]}...'? (Y/n for manual override): ").strip().lower()
                    if user_override_summary == 'n':
                        doc_summary_to_use = input(f"   Enter your summary (max 300 chars): ")[:300].strip() or doc_summary_to_use # Fallback to suggestion if empty
                    else:
                        print(f"   Using summary: {doc_summary_to_use[:70]}...")

                    doc_summary = doc_summary_to_use


                    if doc_content and doc_id: # doc_id should always be set now
                        sample_docs_for_rag.append({"id": doc_id, "content": doc_content, "summary": doc_summary})
                    else:
                        print("Critical error: Content or ID is missing after processing. Skipping this document.")

                if sample_docs_for_rag:
                    await research_system.ingest_documents_into_rag(sample_docs_for_rag)
                    print(f"Completed ingestion of {len(sample_docs_for_rag)} documents.")
                else:
                    print("No valid documents provided for ingestion.")

    else:
        print("Faiss not available or Faiss manager not initialized. Skipping RAG document ingestion option.")

    print(f"\nTarget User Query for this run: \"{user_query_for_session}\"")
    overall_start_time = time.time()
    final_report_str = "Research did not complete successfully."

    try:
        final_report_str = await research_system.run_deep_research(
            user_query_for_session,
            current_session_id=current_session_id_for_run,
            stream_final_report_override=True
        )
    except Exception as e_run:
        print(f"\nAn UNEXPECTED error occurred during the research run: {e_run}")
        traceback.print_exc()
    finally:
        overall_end_time = time.time()
        active_session_id_final = "N/A"
        if research_system.knowledge_base:
            active_session_id_final = research_system.knowledge_base.session_id

        print(f"\n🏁 Deep research process for session '{active_session_id_final}' completed in {overall_end_time - overall_start_time:.2f} seconds.")

        if research_system.knowledge_base:
            print(f"\n--- Final State for Session: {active_session_id_final} ---")
            print(f"Original Query (this session): {research_system.knowledge_base.original_query}")
            print(f"Query Focus (this specific run): {user_query_for_session}")
            print(f"Completed {len(research_system.knowledge_base.completed_sub_queries)} sub-queries.")
            print(f"Pending {len(research_system.knowledge_base.pending_sub_queries)} sub-queries: {research_system.knowledge_base.pending_sub_queries[:3]}...")

            print("\n--- Research Log Summary (Final) ---")
            print(research_system._summarize_research_log_for_display(max_entries=100, max_len_per_entry=500))

            if research_system.knowledge_base.errors:
                print("\n--- KnowledgeBase Errors Logged During Session ---")
                for err_idx, err_entry in enumerate(research_system.knowledge_base.errors):
                    print(f"  KB Error {err_idx+1}: Agent: {err_entry['agent']}, Msg: {err_entry['message']}")
                    if err_entry.get('details'): print(f"    Details: {str(err_entry['details'])[:200]}...")

            print(f"\nAll output files for session '{active_session_id_final}' are in: {os.path.join(SESSIONS_DIR, active_session_id_final)}")
            print(f"Global RAG data (shared across sessions) is in: {GLOBAL_RAG_DIR}")
        else:
            print("KnowledgeBase was not available at the end of the run for final summary.")


def list_previous_sessions() -> List[Tuple[str, str, str]]:
    print(f"DEBUG (list_previous_sessions): Checking for sessions in directory: '{os.path.abspath(SESSIONS_DIR)}'") # Log absolute path
    previous_sessions = []
    if not os.path.exists(SESSIONS_DIR):
        print(f"DEBUG (list_previous_sessions): Sessions directory '{SESSIONS_DIR}' not found at all.")
        return []
    if not os.path.isdir(SESSIONS_DIR):
        print(f"DEBUG (list_previous_sessions): Path '{SESSIONS_DIR}' exists but is not a directory.")
        return []

    items_in_sessions_dir = []
    try:
        items_in_sessions_dir = os.listdir(SESSIONS_DIR)
        print(f"DEBUG (list_previous_sessions): Found {len(items_in_sessions_dir)} items in '{SESSIONS_DIR}': {items_in_sessions_dir}")
    except Exception as e_listdir:
        print(f"DEBUG (list_previous_sessions): Error listing directory '{SESSIONS_DIR}': {e_listdir}")
        return []

    for item_name in items_in_sessions_dir:
        session_dir_path = os.path.join(SESSIONS_DIR, item_name)
        print(f"DEBUG (list_previous_sessions): Checking item '{item_name}' at path '{session_dir_path}'")
        if os.path.isdir(session_dir_path) and item_name != "global_rag":
            kb_path = os.path.join(session_dir_path, "knowledge_base.json")
            print(f"DEBUG (list_previous_sessions):  Item '{item_name}' is a directory (not global_rag). Checking for KB: '{kb_path}'")
            if os.path.exists(kb_path):
                print(f"DEBUG (list_previous_sessions):   Found knowledge_base.json for session '{item_name}'.")
                try:
                    with open(kb_path, 'r', encoding='utf-8') as f: kb_data = json.load(f)
                    query = kb_data.get("original_query", "Unknown Query")
                    mod_time_epoch = os.path.getmtime(kb_path)
                    mod_time_readable = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(mod_time_epoch))
                    previous_sessions.append((item_name, query, mod_time_readable, mod_time_epoch)) # Add epoch for sorting
                    print(f"DEBUG (list_previous_sessions):    Successfully loaded KB for '{item_name}'. Query: '{query[:50]}...', ModTime: {mod_time_readable}")
                except Exception as e:
                    print(f"DEBUG (list_previous_sessions):    Could not read/parse KB for session {item_name}: {e}")
                    previous_sessions.append((item_name, "Error reading query", "N/A", 0))
            else:
                print(f"DEBUG (list_previous_sessions):   knowledge_base.json NOT FOUND for session '{item_name}'.")
        else:
            print(f"DEBUG (list_previous_sessions):  Item '{item_name}' is NOT a session directory (or is global_rag).")

    if previous_sessions:
        previous_sessions.sort(key=lambda x: x[3], reverse=True) # Sort by epoch timestamp
        # Remove epoch timestamp before returning, if not needed elsewhere
        previous_sessions_final = [(sid, q, dt_readable) for sid, q, dt_readable, _ in previous_sessions]
        print(f"DEBUG (list_previous_sessions): Returning {len(previous_sessions_final)} sessions.")
        return previous_sessions_final
    else:
        print(f"DEBUG (list_previous_sessions): No valid previous sessions found after checking all items.")
        return []



if __name__ == '__main__':
    try:
        import nest_asyncio
        nest_asyncio.apply()
        print("nest_asyncio applied (if in a compatible environment like Jupyter/Colab).")
    except ImportError:
        print("nest_asyncio not found. If running in Jupyter/Colab, install for optimal asyncio behavior.")
    except RuntimeError as e_nest:
        print(f"nest_asyncio runtime issue: {e_nest}. Proceeding, but be aware of potential asyncio conflicts if already in an event loop.")

    try:
        asyncio.run(main_colab_runner_async())
    except KeyboardInterrupt:
        print("\nResearch process interrupted by user (KeyboardInterrupt).")
    except RuntimeError as e_async_run:
        if "cannot be called from a running event loop" in str(e_async_run).lower() or \
           "asyncio.run() cannot be called from a running event loop" in str(e_async_run).lower():
            print("\nERROR: asyncio.run() failed. This often happens in environments like Jupyter notebooks that manage their own event loop.")
            print("Consider running the main_colab_runner_async() function directly if you are in such an environment and nest_asyncio is not fully effective, e.g., by calling `await main_colab_runner_async()` in a cell.")
        else:
            print(f"\nA main asyncio RuntimeError occurred: {e_async_run}")
            traceback.print_exc()
    except Exception as e_main_global:
        print(f"\nAn unexpected error occurred in the main execution block: {e_main_global}")
        traceback.print_exc()