In [3]:
%load_ext autoreload
%autoreload 2

from rtfs.chunk_resolution.chunk_graph import ChunkGraph
from rtfs.summarize.summarize import Summarizer
from src.config import GRAPH_ROOT
from src.chunk.chunk import chunk_repo, ChunkStrat

import json
from pathlib import Path

# AIDER
# repo_path = r"C:\Users\jpeng\Documents\projects\codesearch-backend\src\cluster\repos\aider"
# graph_path = GRAPH_ROOT / "Aider-AI_aider.git"

# graph_json = json.loads(open(graph_path, "r").read())
# cg = ChunkGraph.from_json(graph_path, graph_json)

# MOATLESS
repo_path = Path(r"C:\Users\jpeng\Documents\projects\codesearch-backend\src\cluster\repos\moatless-tools")
graph_path = GRAPH_ROOT / "aorwall_moatless-tools"

chunks = chunk_repo(repo_path, ChunkStrat.VANILLA)
# chunks_summarized = chunk_repo(repo_path, ChunkStrat.VANILLA, summarize=True)

# graph_json = json.loads(open(graph_path, "r").read())

Saving chunks to file:  C:\Users\jpeng\AppData\Local\Temp\index\moatless-tools
[Chunker]: 340 chunks used


In [4]:
# 2024-11-09: Building the guided path
# TODO: remove overlaps in clusters

graph_json = json.loads(open("full_code.json", "r").read())
cg = ChunkGraph.from_json(graph_path, graph_json)
for cluster in cg.get_clusters():
    print(cluster.to_str())


Cluster 10: Automated Evaluation and Transition Management for Code Search and Identification
  Chunk: benchmark/claude_evaluation.py::1
  Chunk: benchmark/claude_evaluation.py::3
  Chunk: benchmark/claude_evaluation.py::4
  Chunk: benchmark/claude_evaluation.py::5
  Chunk: benchmark/claude_evaluation.py::6
  Chunk: benchmark/claude_evaluation.py::7
  Chunk: benchmark/claude_evaluation.py::8
  Chunk: benchmark/evaluation.py::2
  Chunk: benchmark/evaluation.py::6
  Chunk: benchmark/evaluation.py::7
  Chunk: benchmark/evaluation.py::9
  Chunk: benchmark/report_v2.py::1

Cluster 5: Automated Evaluation and Analysis of Code Predictions
  Chunk: benchmark/create_dataset.py::2
  Chunk: benchmark/report_v1.py::4
  Chunk: benchmark/report_v2.py::4
  Chunk: swebench/utils.py::4
  Chunk: benchmark/utils.py::4
  Chunk: moatless/file_context.py::14
  Chunk: index/code_index.py::4
  Chunk: index/code_index.py::5
  Chunk: repository/file.py::5
  Chunk: repository/git.py::1
  Chunk: utils/repo.py::2


In [5]:
chunk_paths = [
    "index/code_index.py::13",
    "index/code_index.py::7",
    "index/code_index.py::6", 
    "find/search.py::6",
    "moatless/loop.py::2",
    "moatless/loop.py::3",
    "moatless/loop.py::4",
    "moatless/transition_rules.py::1",
    "moatless/transition_rules.py::2", 
    "moatless/transition_rules.py::3",
    "moatless/transition_rules.py::4",
    "moatless/transition_rules.py::5",
    "benchmark/claude_evaluation.py::4",
    "benchmark/evaluation.py::2",
    "benchmark/evaluation.py::9",
    "benchmark/evaluation.py::5"
]

def get_cluster_from_chunk(chunks, cg):
    """Find the largest clusters containing each chunk.
    
    Args:
        chunks: List of chunk IDs to search for
        cg: ChunkGraph containing the clusters
        
    Returns:
        Set of clusters that contain the input chunks
    """
    clusters = set()
    
    for chunk in chunks:
        # Find all clusters containing this chunk
        matching_clusters = [
            cluster for cluster in cg.get_clusters(return_content=True)
            if chunk in [c.id for c in cluster.chunks]
        ]
        if matching_clusters:
            # Add the largest cluster containing this chunk
            largest_cluster = max(matching_clusters, key=lambda x: len(x.chunks))
            clusters.add(largest_cluster)
        else:
            print(f"Chunk {chunk} not found in any cluster")
            
    return clusters
        
matched_clusters = get_cluster_from_chunk(chunk_paths, cg)
for cluster in matched_clusters:
    print("Cluster: ", cluster.to_str())


Cluster:  Cluster 9: Automated Code Evaluation and Workflow Management
  Chunk: benchmark/evaluation.py::5
  Chunk: benchmark/report_v2.py::3
  Chunk: benchmark/utils.py::8
  Chunk: moatless/loop.py::2
  Chunk: moatless/trajectory.py::2

Cluster:  Cluster 10: Automated Evaluation and Transition Management for Code Search and Identification
  Chunk: benchmark/claude_evaluation.py::1
  Chunk: benchmark/claude_evaluation.py::3
  Chunk: benchmark/claude_evaluation.py::4
  Chunk: benchmark/claude_evaluation.py::5
  Chunk: benchmark/claude_evaluation.py::6
  Chunk: benchmark/claude_evaluation.py::7
  Chunk: benchmark/claude_evaluation.py::8
  Chunk: benchmark/evaluation.py::2
  Chunk: benchmark/evaluation.py::6
  Chunk: benchmark/evaluation.py::7
  Chunk: benchmark/evaluation.py::9
  Chunk: benchmark/report_v2.py::1

Cluster:  Cluster 77036: Automated Code Context Expansion and Decision Making System
  Chunk: find/decide.py::3
  Chunk: find/identify.py::2
  Chunk: find/search.py::6

Cluster:

In [6]:
from src.chat.lmp.walkthrough import identify_transitions
from llm import LLMModel

WALKTHROUGH = """
1. Entry Point - `benchmark/claude_evaluation.py`:
- The process starts in the Claude evaluation module where functions like `evaluate_search()` or `evaluate_search_and_identify()` initialize the evaluation
- These functions set up the transition rules and evaluation parameters

2. Evaluation Setup - `benchmark/evaluation.py`:
- The `Evaluation` class creates an evaluation instance with configured parameters
- It handles setting up directories, managing trajectories, and initializing the workspace

3. State Management - `AgenticLoop`:
- The `AgenticLoop` class manages the state transitions and execution flow
- It starts with the initial state (SearchCode) and handles transitions between states

4. Search Execution - `SearchCode` State:
- Initial state that handles the search request
- Interacts with the `CodeIndex` class to perform the actual search
- Uses the file context and workspace to manage code access

5. Search Engine - `CodeIndex`:
- The `semantic_search()` method orchestrates the search process
- Calls `_vector_search()` to perform the actual vector-based search
- Filters and processes results based on parameters like:
  - file patterns
  - class/function names
  - exact matches
  - token limits

6. Vector Search - `_vector_search()`:
- Performs the low-level vector search operation
- Creates query embeddings
- Applies filters and retrieves results from the vector store
- Processes and filters the results based on:
  - File patterns
  - Test file exclusions
  - Exact matches
  - Token counts

7. Result Processing:
- Results are returned as `SearchCodeResponsle` objects
- Contains hits with file paths and relevant code spans
- Includes metadata about the search results

8. State Transitions:
- Search results trigger transitions to subsequent states:
  - `IdentifyCode`: Processes and identifies reevant code spans
  - `DecideRelevance`: Makes decisions about the relevance of identified code
  - Finally transitions to either `Finished` or `Rejected` states
"""

model = LLMModel(provider="openai")
transitions = identify_transitions(model, matched_clusters, WALKTHROUGH, start_cluster=10)

In [6]:
# create wikis
from src.chat.lmp.walkthrough import cluster_wiki

wiki = {cluster.title: cluster_wiki(model, cluster).content for cluster in matched_clusters}
print(len(wiki))

KeyboardInterrupt: 

In [10]:
# generate chat transition and write to disk
from src.chat.lmp.walkthrough import generate_chat_transition
from src.chat.models import WalkthroughChat, WalkthroughData
from src.config import WALKTHROUGH_ROOT
import uuid
import json

def find_cluster_by_id(id, cg):
    return next(filter(lambda cluster: cluster.id == id, cg.get_clusters()))

walkthrough_path = WALKTHROUGH_ROOT / "aorwall_moatless-tools"
all_chats = []
for t in transitions.transitions:
    cluster = find_cluster_by_id(t.dst_cluster, cg)
    content, metadata = generate_chat_transition(model, t, matched_clusters, WALKTHROUGH)
    all_chats.append((content, metadata, str(uuid.uuid4())))

walkthroughs = []
for i, chat in enumerate(all_chats):
    content, metadata, id = chat
    next_chat = all_chats[i+1][2] if i < len(all_chats) - 1 else None
    data = WalkthroughData(next_chat=next_chat, metadata=metadata)
    walkthrough = WalkthroughChat(content=content,
                                   metadata=data, 
                                   id=id)
    walkthroughs.append(walkthrough)


In [24]:
from src.chat.lmp.walkthrough import generate_walkthroughs

walkthroughs = generate_walkthroughs(model, WALKTHROUGH, transitions.transitions, matched_clusters)

In [15]:
print(walkthroughs)

[{'id': '80a6a493-4bf5-4b4e-9a13-29536817baa5', 'content': '**Evaluation Transition Orchestrator**\n\nThe transition from the initialization of an evaluation in the `benchmark/claude_evaluation.py` to the workflow management in `benchmark/evaluation.py` involves setting up and running an evaluation loop:\n\n- The `evaluate_search` function in `benchmark/claude_evaluation.py` sets up transition rules using the `TransitionRules` class. These rules define the states and transitions for the evaluation process.\n- An `Evaluation` instance is created with transition rules, directories for storing evaluation data, and other parameters.\n- The `Evaluation` class handles the setup and execution of the evaluation loop using the `AgenticLoop` class. It manages the workspace, tracks the evaluation trajectory, and logs the evaluation process.\n- The `AgenticLoop` class orchestrates state transitions based on the defined transition rules, running actions until completion or termination, as specified

In [22]:
from src.chat.lmp.walkthrough import construct_walkthroughs

construct_walkthroughs(walkthroughs)

([WalkthroughChat(content='**Evaluation Transition Orchestrator**\n\nThe transition from the initialization of an evaluation in the `benchmark/claude_evaluation.py` to the workflow management in `benchmark/evaluation.py` involves setting up and running an evaluation loop:\n\n- The `evaluate_search` function in `benchmark/claude_evaluation.py` sets up transition rules using the `TransitionRules` class. These rules define the states and transitions for the evaluation process.\n- An `Evaluation` instance is created with transition rules, directories for storing evaluation data, and other parameters.\n- The `Evaluation` class handles the setup and execution of the evaluation loop using the `AgenticLoop` class. It manages the workspace, tracks the evaluation trajectory, and logs the evaluation process.\n- The `AgenticLoop` class orchestrates state transitions based on the defined transition rules, running actions until completion or termination, as specified in the evaluation setup.\n\nTran

In [8]:
from src.chat.lmp.walkthrough import post_process_smooth

chat_list = post_process_smooth(model, WALKTHROUGH, walkthroughs[1]).chats


In [10]:
for chat in chat_list:
    print("Chat Name: ", chat.name)
    print(chat.content)

Chat Name:  Introduction
Welcome to the Code Search and Evaluation Framework! This feature is designed to automate the process of evaluating code through a series of state transitions. It integrates advanced search capabilities with a dynamic state management system to streamline coding tasks and evaluations. Let's dive into how this system is structured and operates across its various components.
Chat Name:  CodeSearchEvaluationFlow
The [_evaluate_search_ function][[benchmark/claude_evaluation.py::4]] in `benchmark/claude_evaluation.py` sets up an `Evaluation` instance by configuring [_TransitionRules_][[moatless/transition_rules.py::2]] with global and state-specific parameters for the state transitions during a code search evaluation. This function specifies the initial state as [_SearchCode_][[find/search.py::6]] and sets transition rules that dictate the flow from [_SearchCode_][[find/search.py::6]] to [_Finished_][[moatless/state.py::5]], based on certain triggers. The [_Evaluati

In [8]:
from src.config import WALKTHROUGH_ROOT


walkthrough_path = WALKTHROUGH_ROOT / "aorwall_moatless-tools"

def write_new(name, walkthroughs):
    # Read existing content
    try:
        with open(walkthrough_path, "r") as f:
            old = json.loads(f.read())
            if not isinstance(old, list):
                old = []
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Starting fresh file due to: {e}")
        old = []

    # Add new walkthrough
    old.append({
        "name": name,
        "walkthroughs": [c.dict() for c in walkthroughs]
    })

    # Write back entire file
    with open(walkthrough_path, "w") as f:
        json.dump(old, f, indent=2)

    print(f"Wrote {len(old)} walkthroughs: {[w['name'] for w in old]}")
write_new("Unsmooth", walkthroughs)

Starting fresh file due to: Expecting value: line 1 column 1 (char 0)
Wrote 1 walkthroughs: ['Unsmooth']


In [8]:
# generate chat transition and write to disk
from src.chat.lmp.walkthrough import generate_chat_transition
from src.config import WALKTHROUGH_ROOT
import json

def find_cluster_by_id(id, cg):
    return next(filter(lambda cluster: cluster.id == id, cg.get_clusters()))

walkthrough_path = WALKTHROUGH_ROOT / "aorwall_moatless-tools"
all_chats = []
for t in transitions.transitions[:1]:
    cluster = find_cluster_by_id(t.dst_cluster, cg)
    chat = generate_chat_transition(model, t, matched_clusters, WALKTHROUGH)

    print(chat)

    # with open(walkthrough_path, "w") as f:
    #     f.write(json.dumps(chat.dict()))

    # all_chats.append({"chat": chat, "next_cluster": cluster.title})
    

('The transition occurs when the `Evaluation` class is instantiated in `benchmark/claude_evaluation.py`, where transition rules and evaluation parameters are defined and passed to manage the evaluation process. These parameters are utilized in the [_Evaluation class_][[benchmark/evaluation.py::2]] to conduct evaluations, leveraging the [_AgenticLoop_][[moatless/loop.py::2]] to manage state transitions and execution flow. The loop uses the transition rules to navigate through different states, as defined in the [_TransitionRules_][[moatless/transition_rules.py::2]]. The entire process is logged and managed through specific directories for trajectories and logs, ensuring that all actions and transitions are recorded and retrievable.', {'benchmark/evaluation.py::2': SrcMetadata(filepath='moatless\\benchmark\\evaluation.py', start_line=73, end_line=138), 'moatless/loop.py::2': SrcMetadata(filepath='moatless\\loop.py', start_line=45, end_line=157), 'moatless/transition_rules.py::2': SrcMeta

In [14]:
from src.chat.models import WalkthroughChat 
import json

with open(walkthrough_path) as f:
    c = json.loads(f.read())

    for chat in c:
        WalkthroughChat.from_json(chat)


In [10]:
all_chats = [{"content": c["chat"].content, "nextWiki": c["next_cluster"]} for c in all_chats]
print(all_chats)

[{'content': 'The transition between Cluster 10 and Cluster 9 is centered around the initiation and execution of an evaluation workflow, which involves setting up evaluation parameters, managing the flow through different states, and generating reports based on the results. Here\'s a detailed breakdown of how these clusters interact:\n\n### Cluster 10: Evaluation Initialization\n\n1. **Entry Point Functions**: Cluster 10 begins with several functions defined in `benchmark/claude_evaluation.py`, such as `evaluate_search()`, `evaluate_search_and_identify()`, and `evaluate_search_and_code()`. These functions serve as entry points for different types of evaluations. Each function configures a specific set of transition rules and parameters. For example, `evaluate_search()` sets up transitions for a search-focused evaluation.\n\n2. **Transition Rules Setup**: Within these functions, `TransitionRules` objects are created with specific `global_params` and `state_params`. These parameters defi

: 

In [26]:
print([c.content for c in all_chats])
print(len(all_chats))

['The transition between Cluster 10 and Cluster 9 revolves around orchestrating an automated evaluation process for code search and identification. This process begins in Cluster 10, specifically within the `benchmark/claude_evaluation.py` file, where various evaluation functions such as `evaluate_search()`, `evaluate_search_and_identify()`, and `evaluate_search_and_code()` are defined. These functions are responsible for setting up and triggering evaluations based on different scenarios.\n\n1. **Initiation of Evaluation**: \n   - Each evaluation function in `benchmark/claude_evaluation.py` begins by defining a set of transition rules using the `TransitionRules` class. For example, in the `evaluate_search()` function, transition rules are established with `SearchCode` as the initial state, transitioning to `Finished` upon triggering specific events like `"did_search"` or `"finish"`.\n   - The `global_params` and `state_params` dictionaries are used to configure the model and state-spec

In [21]:
print(chat.content)

The transition between Cluster 10 and Cluster 9 revolves around the orchestration and execution of automated code evaluations. This transition initiates in the `benchmark/claude_evaluation.py` file of Cluster 10, where several functions are defined to facilitate different types of evaluations, such as `evaluate_search()`, `evaluate_search_and_identify()`, `evaluate_search_and_code()`, and others.

### Initialization and Setup (Cluster 10)

1. **Function Calls**: Each function in `benchmark/claude_evaluation.py` corresponds to a specific type of evaluation. These functions create instances of the `TransitionRules` class, which define the rules and states for the evaluation process. For example, the `evaluate_search()` function sets up transitions specifically for search-related tasks, utilizing the `TransitionRules` class to establish the initial state as `SearchCode` and define possible state transitions.

2. **Creation of Evaluation Instances**: Each evaluation function instantiates t

In [20]:
for cluster in cg.get_clusters():
    print(cluster.to_str())

Cluster 10: Automated Evaluation and Transition Management for Code Search and Identification
  Chunk: benchmark/claude_evaluation.py::5
  Chunk: benchmark/claude_evaluation.py::6
  Chunk: benchmark/claude_evaluation.py::7
  Chunk: benchmark/evaluation.py::2
  Chunk: benchmark/evaluation.py::6
  Chunk: benchmark/evaluation.py::7
  Chunk: benchmark/evaluation.py::9
  Chunk: benchmark/report_v2.py::1
  Chunk: benchmark/claude_evaluation.py::1
  Chunk: benchmark/claude_evaluation.py::3
  Chunk: benchmark/claude_evaluation.py::4
  Chunk: benchmark/claude_evaluation.py::8

Cluster 5: Automated Evaluation and Analysis of Code Predictions
  Chunk: benchmark/create_dataset.py::2
  Chunk: benchmark/report_v1.py::4
  Chunk: benchmark/report_v2.py::4
  Chunk: swebench/utils.py::4
  Chunk: benchmark/utils.py::4
  Chunk: moatless/file_context.py::14
  Chunk: index/code_index.py::4
  Chunk: index/code_index.py::5
  Chunk: repository/file.py::5
  Chunk: repository/git.py::1
  Chunk: utils/repo.py::2


In [19]:
print(len([data for u, v, data in cg._graph.edges(data=True) if data["kind"] == "ClusterRef"]))
print(len([data for node, data in cg._graph.nodes(data=True) if data["kind"] == "ClusterNode"]))

274
31


In [21]:
print(len(paths))

104


In [32]:
# 2024-11-07 TODO: compare diff between full code and summarized code
from rtfs.summarize.summarize import Summarizer

summarizer_fullcode = Summarizer(cg_fullcode)
summarizer_fullcode.summarize()


updating node:  1 with summary:  title='Agentic State Transition and Evaluation Framework' summary='This code defines a framework for handling agentic state transitions and evaluations in a software system. It includes classes and functions for defining transition rules, managing agentic states, and running evaluations based on specific parameters and conditions.' key_variables='AgenticState, TransitionRules, Evaluation, AgenticLoop'
updating node:  10 with summary:  title='Automated Evaluation and Reporting System for Code Search and Identification' summary='These code snippets define an automated evaluation system that processes and evaluates code search and identification tasks. It includes functionalities for running evaluations, processing results, generating reports, and managing transition states.' key_variables='evaluate_search_and_identify, evaluate_search_and_code, Evaluation, create_evaluation_name'
updating node:  5 with summary:  title='Automated Experiment Evaluation and 

In [77]:
for p in paths:
    print(p.to_str(verbose=False, show_refs=True))
    print("--------------------------------------------------------")

Agentic State Management and Transitions Framework
-[Content, AssistantMessage, Content, VerificationError]-> Agentic State Transition and Evaluation Framework
-[CodeBlockTypeGroup, CodeBlockType]-> Code Block Parsing and Management System
-[ReferenceScope]-> Code Relationship Management and Reference Parsing
-[ActionResponse, ActionResponse, ActionResponse, ActionResponse]-> Agentic State Management and Transitions Framework
-[Pending]-> Agentic State Transition and Evaluation Framework
-[ActionRequest]-> Agentic State Transition and Evaluation Framework
--------------------------------------------------------
Agentic State Management and Transitions Framework
-[Content, AssistantMessage, Content, VerificationError]-> Agentic State Transition and Evaluation Framework
-[CodeBlockTypeGroup, CodeBlockType]-> Code Block Parsing and Management System
-[ActionResponse, ActionResponse, ActionResponse, ActionResponse]-> Agentic State Management and Transitions Framework
-[ReferenceScope]-> Co

In [71]:
# look for vector indexing
search_paths = [p for p in paths if p.find_cluster("Code Search and Index Feature")]
print(len(search_paths))
for p in search_paths:
    print(p.to_str())
    print("___________________________________________________________________")

2
Workspace and Code Index Management for SWE-bench
[index/code_index.py::10 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::6 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::8 ] --SearchCodeHit--> [index/types.py::2]
[index/code_index.py::9 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::7 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::10 ] --SearchCodeHit--> [index/types.py::2]
[index/code_index.py::11 ] --SearchCodeResponse--> [index/types.py::2]
-> Code Search and Index Feature
[index/code_index.py::10 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::6 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::8 ] --SearchCodeHit--> [index/types.py::2]
[index/code_index.py::9 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::7 ] --SearchCodeResponse--> [index/types.py::2]
[index/code_index.py::10 ] --SearchCodeHit--> [index/types.py::2]
[index/code_index.py