# Phase 1: Initialization and Understanding

This notebook handles:
1. Loading user inputs from file
2. Performing initial file analysis on all relevant files
3. Generating draft high-level request objective
4. Saving results for next phase

In [1]:
# Import shared utilities
import sys
sys.path.append('.')

In [2]:
from google.genai import types
from mirascope.core import google
from mirascope import Messages
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
import asyncio

from utils.interim_data_management import save_interim_data
from utils.format_files_dict_to_xml import format_files_dict_to_xml
from utils.config import update_config, get_config

## Configuration

In [3]:
# Phase 1 specific configuration
INPUT_NAME = "context_manager_update_frontend"  # Change this to process different inputs
GLOBAL_CODEBASE_PURPOSE = "AI-powered content creation and optimization platform for social media thought leadership. Transform raw ideas and long-form content into high-performing social media posts through intelligent content processing, hook optimization, and multi-format post generation"

# Update global config if needed
update_config(DEBUG=False)

config = get_config()

## Step 1.1: Receive User Inputs

In [4]:
from utils.extract_xml_tags import extract_xml_content
from utils.build_files_dict import build_files_dict

filename = f"../inputs/{INPUT_NAME}.txt"

# Tags to extract
tags_to_extract = ["file_map", "file_contents", "user_instructions", "Referenced APIs"]

# Extract content
extracted_data = extract_xml_content(filename, tags_to_extract)

# Return the three strings as a tuple
file_tree = extracted_data.get("file_map", "")
file_contents = extracted_data.get("file_contents", "")
user_instructions = extracted_data.get("user_instructions", "")
referenced_apis = extracted_data.get("Referenced APIs", "")
files_dict = build_files_dict(file_contents)

print(f"Loaded {len(files_dict)} files from {INPUT_NAME}")
print(f"User instructions length: {len(user_instructions)} characters")

Loaded 55 files from context_manager_update_frontend
User instructions length: 8679 characters


In [5]:
# Save raw inputs for reference
raw_inputs = {
    "input_name": INPUT_NAME,
    "file_tree": file_tree,
    "user_instructions": user_instructions,
    "referenced_apis": referenced_apis,
    "global_codebase_purpose": GLOBAL_CODEBASE_PURPOSE,
    "files_count": len(files_dict)
}

save_interim_data(config.get("INTERIM_DATA_DIR"), raw_inputs, "raw_inputs", "phase1")
save_interim_data(config.get("INTERIM_DATA_DIR"), files_dict, "files_dict", "phase1")

Saved raw_inputs to interim_data/phase1/raw_inputs.json
Saved files_dict to interim_data/phase1/files_dict.json


'interim_data/phase1/files_dict.json'

## Step 1.2: Define Models and Prompts for File Analysis

In [6]:
class InitialFileAnalysisItem(BaseModel):
    file_path: str
    overall_relevance_to_request_objective: str
    initial_content_summary_purpose: str

class InitialFileAnalysisItemWithContent(InitialFileAnalysisItem):
    file_contents: str

In [7]:
FILE_ANALYSIS_SYSTEM_PROMPT = """<system_context>
You are an expert code analyst. 
Your task is to understand a specific code file within the context of a larger request and a user's request. 
You need to provide a concise analysis focusing on two key aspects: 
- Its overall relevance to the request's goal
- A summary of its current content/purpose.
</system_context>

<input_data>
    <overall_codebase_purpose>
    {overall_codebase_purpose_placeholder}
    </overall_codebase_purpose>

    <user_request>
    {user_request_placeholder}
    </user_request>

    <file_tree>
    {file_tree_placeholder}
    <file_tree>

    <all_other_relevant_files_summary>
    {summary_of_other_files_placeholder}
    </all_other_relevant_files_summary>
</input_data>

<output_instructions>
Based on all the provided input data, generate the following for the <file_to_analyze>:

1.  **Overall Relevance to Request Objective:**
    *   Directly state how this file relates to the request's goal. Be brief and specific (e.g., "specifies Supabase request being used").
    *   Note if it is a core component, utility, config, UI, data model, etc., in relation to the request.
    *   If relevant, mention any transitional or legacy role (e.g., "still holds request ID useful for migration").
    *   Keep this concise (2-4 sentences).

2.  **Initial Content Summary/Purpose:**
    *   Succinctly state the file's main purpose or function as it currently exists (e.g., "configures the Supabase request ID").
    *   Focus on what it does now, not future changes.
    *   Be as direct and concise as possible.
    *   Keep this concise (2-4 sentences).
</output_instructions>
"""

FILE_ANALYSIS_USER_PROMPT = """<user_request>
    <input_text>Please analyze the following file and provide a concise summary of its purpose and functionality.</input_text>
    <file_to_analyze>
        {file_content_placeholder}
    </file_to_analyze>
</user_request>
"""

In [8]:
@google.call(
    "gemini-2.0-flash",
    response_model=InitialFileAnalysisItem,
    call_params={
        "config": types.GenerateContentConfig(temperature=0.2)
    },
)
def async_file_analysis(system_prompt: str, user_prompt: str) -> str:
    return [Messages.System(system_prompt), Messages.User(user_prompt)]

## Step 1.3: Perform Initial File Analysis

In [9]:
async def generate_all_file_analyses(
    files_dict: Dict[str, str],
    overall_codebase_purpose: str,
    user_request_text: str,
    file_tree_text: str,
    file_analysis_system_template: str,
    file_analysis_user_template: str
) -> List['InitialFileAnalysisItem']:
    """
    Generates file analysis items for all files in the files_dict asynchronously.
    """
    tasks = []

    for file_path, current_file_content in files_dict.items():
        # 1. Create a dictionary containing all other files and their content
        other_files_dict = {
            fp: content for fp, content in files_dict.items() if fp != file_path
        }

        # 2. Format the dictionary of other files into an XML string
        other_files_summary_xml = format_files_dict_to_xml(other_files_dict)

        # 3. Populate the system prompt
        system_prompt = file_analysis_system_template.replace(
            "{overall_codebase_purpose_placeholder}", overall_codebase_purpose
        ).replace(
            "{user_request_placeholder}", user_request_text
        ).replace(
            "{file_tree_placeholder}", file_tree_text
        ).replace(
            "{summary_of_other_files_placeholder}", other_files_summary_xml
        )

        # 4. Populate the user prompt
        prompt_file_content_with_path = f"File Path: {file_path}\n\n{current_file_content}"
        
        user_prompt = file_analysis_user_template.replace(
            "{file_content_placeholder}", prompt_file_content_with_path
        )
        
        task = asyncio.to_thread(async_file_analysis, system_prompt, user_prompt)
        tasks.append(task)

    # Run all analysis tasks concurrently
    results = await asyncio.gather(*tasks)
    
    return results


print("Starting file analyses...")
all_analyses = await generate_all_file_analyses(
    files_dict=files_dict,
    overall_codebase_purpose=GLOBAL_CODEBASE_PURPOSE,
    user_request_text=user_instructions,
    file_tree_text=file_tree,
    file_analysis_system_template=FILE_ANALYSIS_SYSTEM_PROMPT,
    file_analysis_user_template=FILE_ANALYSIS_USER_PROMPT
)
print(f"Completed {len(all_analyses)} file analyses.")

Starting file analyses...
Completed 55 file analyses.


In [10]:
# Add file contents to analysis items
all_analyses_with_content = []
for analysis_item in all_analyses:
    file_contents = files_dict.get(analysis_item.file_path, "")
    item_with_content = InitialFileAnalysisItemWithContent(
        **analysis_item.__dict__,
        file_contents=file_contents
    )
    all_analyses_with_content.append(item_with_content)

# Save file analyses
save_interim_data(config.get("INTERIM_DATA_DIR"), all_analyses_with_content, "file_analyses", "phase1")

Saved file_analyses to interim_data/phase1/file_analyses.json


'interim_data/phase1/file_analyses.json'

## Step 1.4: Generate Draft High-Level Request Objective

In [11]:
UNDERSTAND_USER_REQUEST_SYSTEM_PROMPT = """<system_context>
You are an expert technical writer and request planner. 
Your primary task is to synthesize a user's request and an initial analysis of relevant code files into a clear and comprehensive high-level request objective. 
This objective is CRITICAL as it ensures both the system and the user are perfectly aligned on the overall goal before proceeding with detailed planning. 
It will inform the entire generation pipeline.
</system_context>

<input_data>
    <user_original_request>
    {user_original_request_placeholder}
    </user_original_request>

    <overall_codebase_purpose>
    {overall_codebase_purpose_placeholder}
    </overall_codebase_purpose>

    <file_tree>
    {file_tree_placeholder}
    </file_tree>

    <initial_file_analysis_summary>
    {initial_file_analysis_summary_placeholder}
    </initial_file_analysis_summary>
</input_data>

<output_instructions>
Based on the <user_original_request>, the <overall_codebase_purpose>, and the <initial_file_analysis_summary>, please formulate a high-level request objective.

This objective MUST:
1.  Clearly and unambiguously state what will be achieved, built, or changed upon completion of the entire request/task.
2.  Be comprehensive enough to capture the full scope of the user's intent, typically within 2-4 sentences. The aim is complete understanding, not just brevity.
3.  Accurately reflect the core intent of the user's request, grounded in the context of the existing codebase and relevant files.
4.  Focus on the "what" (the end state or primary deliverable) and avoid detailing specific "how-to" implementation steps or sub-tasks at this stage.

The goal is to create a statement that, if confirmed by the user, signifies complete agreement on the request's ultimate aim.

Output only the high-level request objective.
</output_instructions>
"""

In [12]:
class OverallRequestObjectiveResponse(BaseModel):
    overall_request_objective: str

@google.call(
    "gemini-2.5-pro-preview-05-06",
    response_model=OverallRequestObjectiveResponse,
    call_params={
        "config": types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(thinking_budget=2048), 
            temperature=0.5
        )
    },
)
def generate_overall_request_objective(system_prompt: str) -> str:
    return [Messages.System(system_prompt), Messages.User("Please generate the overall request objective.")]

In [13]:
# Format and generate objective
def format_overall_summary_system_prompt(
    user_original_request: str,
    overall_codebase_purpose: str,
    file_tree: str,
    initial_file_analysis_summary: str
) -> str:
    system_prompt = UNDERSTAND_USER_REQUEST_SYSTEM_PROMPT.replace(
        "{user_original_request_placeholder}", user_original_request.strip()
    ).replace(
        "{overall_codebase_purpose_placeholder}", overall_codebase_purpose.strip()
    ).replace(
        "{file_tree_placeholder}", file_tree
    ).replace(
        "{initial_file_analysis_summary_placeholder}", initial_file_analysis_summary.strip()
    )
    return system_prompt

overall_summary_system_prompt = format_overall_summary_system_prompt(
    user_instructions, 
    GLOBAL_CODEBASE_PURPOSE, 
    file_tree, 
    '\n'.join([analysis.model_dump_json(indent=2) for analysis in all_analyses_with_content])
)

# Uncomment to generate the objective
# overall_request_objective = generate_overall_request_objective(overall_summary_system_prompt).overall_request_objective
# print(f"Generated objective: {overall_request_objective}")

## Save Phase 1 Results

In [14]:
# Save all Phase 1 results
phase1_summary = {
    "input_name": INPUT_NAME,
    "files_analyzed": len(all_analyses_with_content),
    "overall_codebase_purpose": GLOBAL_CODEBASE_PURPOSE,
    "user_instructions_length": len(user_instructions),
    # "overall_request_objective": overall_request_objective  # Uncomment when generated
}

save_interim_data(config.get("INTERIM_DATA_DIR"), phase1_summary, "phase1_summary", "phase1")
print("\nPhase 1 completed successfully!")

Saved phase1_summary to interim_data/phase1/phase1_summary.json

Phase 1 completed successfully!
