Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ venv/
.ruff_cache/
.vscode/
.coverage
config.json
__pycache__/
Empty file.
12 changes: 12 additions & 0 deletions prometheus/configuration/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import json
from pathlib import Path

CONFIG_FILE = Path(__file__).parent / "config.json"


def load_config():
with open(CONFIG_FILE, "r") as f:
return json.load(f)


config = load_config()
12 changes: 12 additions & 0 deletions prometheus/configuration/example_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"neo4j": {
"uri": "CHANGE_ME",
"username": "CHANGE_ME",
"password": "CHANGE_ME",
"database": "neo4j",
"batch_size": 1000
},
"knowledge_graph": {
"max_ast_depth": 5
}
}
23 changes: 15 additions & 8 deletions prometheus/graph/graph_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class ASTNode:

Attributes:
type: The tree-sitter node type.
start_line: The starting line number.
end_line: The ending line number.
start_line: The starting line number. 0-indexed and inclusive.
end_line: The ending line number. 0-indexed and inclusive.
text: The source code correcpsonding to the node.
"""

Expand Down Expand Up @@ -79,7 +79,9 @@ def to_neo4j_node(self) -> Union["Neo4jFileNode", "Neo4jASTNode", "Neo4jTextNode
)
case TextNode():
return Neo4jTextNode(
node_id=self.node_id, text=self.node.text, metadata=self.node.metadata
node_id=self.node_id,
text=self.node.text,
metadata=self.node.metadata,
)
case _:
raise ValueError("Unknown KnowledgeGraphNode.node type")
Expand Down Expand Up @@ -122,23 +124,28 @@ def to_neo4j_edge(
match self.type:
case KnowledgeGraphEdgeType.has_file:
return Neo4jHasFileEdge(
source=self.source.to_neo4j_node(), target=self.target.to_neo4j_node()
source=self.source.to_neo4j_node(),
target=self.target.to_neo4j_node(),
)
case KnowledgeGraphEdgeType.has_ast:
return Neo4jHasASTEdge(
source=self.source.to_neo4j_node(), target=self.target.to_neo4j_node()
source=self.source.to_neo4j_node(),
target=self.target.to_neo4j_node(),
)
case KnowledgeGraphEdgeType.parent_of:
return Neo4jParentOfEdge(
source=self.source.to_neo4j_node(), target=self.target.to_neo4j_node()
source=self.source.to_neo4j_node(),
target=self.target.to_neo4j_node(),
)
case KnowledgeGraphEdgeType.has_text:
return Neo4jHasTextEdge(
source=self.source.to_neo4j_node(), target=self.target.to_neo4j_node()
source=self.source.to_neo4j_node(),
target=self.target.to_neo4j_node(),
)
case KnowledgeGraphEdgeType.next_chunk:
return Neo4jNextChunkEdge(
source=self.source.to_neo4j_node(), target=self.target.to_neo4j_node()
source=self.source.to_neo4j_node(),
target=self.target.to_neo4j_node(),
)
case _:
raise ValueError(f"Unknown edge type: {self.type}")
Expand Down
6 changes: 4 additions & 2 deletions prometheus/graph/knowledge_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,16 @@ def _build_graph(self, root_dir: Path):
for child_file in sorted(file.iterdir()):
child_file_node = FileNode(
basename=child_file.name,
relative_path=str(child_file.relative_to(root_dir)),
relative_path=str(child_file.relative_to(root_dir).as_posix()),
)
kg_child_file_node = KnowledgeGraphNode(self._next_node_id, child_file_node)
self._next_node_id += 1
self._knowledge_graph_nodes.append(kg_child_file_node)
self._knowledge_graph_edges.append(
KnowledgeGraphEdge(
kg_file_path_node, kg_child_file_node, KnowledgeGraphEdgeType.has_file
kg_file_path_node,
kg_child_file_node,
KnowledgeGraphEdgeType.has_file,
)
)

Expand Down
Empty file added prometheus/tools/__init__.py
Empty file.
240 changes: 240 additions & 0 deletions prometheus/tools/graph_traversal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
from pathlib import Path
from pydantic import BaseModel, Field

from neo4j import GraphDatabase

from prometheus.parser import tree_sitter_parser
from prometheus.utils import neo4j_util

MAX_RESULT = 20

###############################################################################
# FileNode retrieval #
###############################################################################


class FindFileNodeWithBasenameInput(BaseModel):
basename: str = Field("The basename of FileNode to search for")


def find_file_node_with_basename(basename: str, driver: GraphDatabase.driver) -> str:
query = f"""
MATCH (f:FileNode {{ basename: '{basename}' }})
RETURN f AS FileNode
ORDER BY f.node_id
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class FindFileNodeWithRelativePathInput(BaseModel):
relative_path: str = Field("The relative_path of FileNode to search for")


def find_file_node_with_relative_path(
relative_path: str, driver: GraphDatabase.driver
) -> str:
query = f"""\
MATCH (f:FileNode {{ relative_path: '{relative_path}' }})
RETURN f AS FileNode
ORDER BY f.node_id
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


###############################################################################
# ASTNode retrieval #
###############################################################################


class FindASTNodeWithTextInput(BaseModel):
text: str = Field("Search ASTNode that exactly contains this text.")


def find_ast_node_with_text(text: str, driver: GraphDatabase.driver) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_AST]-> (:ASTNode) -[:PARENT_OF*]-> (a:ASTNode)
WHERE a.text CONTAINS '{text}'
RETURN f as FileNode, a AS ASTNode
ORDER BY SIZE(a.text)
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class FindASTNodeWithTypeInput(BaseModel):
type: str = Field("Search ASTNode that has this tree-sitter node type.")


def find_ast_node_with_type(type: str, driver: GraphDatabase.driver) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_AST]-> (:ASTNode) -[:PARENT_OF*]-> (a:ASTNode {{ type: '{type}' }})
RETURN f as FileNode, a AS ASTNode
ORDER BY a.node_id
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class FindASTNodeWithTextInFileInput(BaseModel):
text: str = Field("Search ASTNode that exactly contains this text.")
basename: str = Field("The basename of FileNode to search ASTNode.")


def find_ast_node_with_text_in_file(
text: str, basename: str, driver: GraphDatabase.driver
) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_AST]-> (:ASTNode) -[:PARENT_OF*]-> (a:ASTNode)
WHERE f.basename = '{basename}' AND a.text CONTAINS '{text}'
RETURN f as FileNode, a AS ASTNode
ORDER BY SIZE(a.text)
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class FindASTNodeWithTypeInFileInput(BaseModel):
type: str = Field("Search ASTNode with this tree-sitter node type.")
basename: str = Field("The basename of FileNode to search ASTNode.")


def find_ast_node_with_type_in_file(
type: str, basename: str, driver: GraphDatabase.driver
) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_AST]-> (:ASTNode) -[:PARENT_OF*]-> (a:ASTNode)
WHERE f.basename = '{basename}' AND a.type = '{type}'
RETURN f as FileNode, a AS ASTNode
ORDER BY SIZE(a.text)
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class FindASTNodeWithTypeAndTextInput(BaseModel):
type: str = Field("Search ASTNode with this tree-sitter node type.")
text: str = Field("Search ASTNode that exactly contains this text.")


def find_ast_node_with_type_and_text(
type: str, text: str, driver: GraphDatabase.driver
) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_AST]-> (:ASTNode) -[:PARENT_OF*]-> (a:ASTNode)
WHERE a.type = '{type}' AND a.text CONTAINS '{text}'
RETURN f as FileNode, a AS ASTNode
ORDER BY SIZE(a.text)
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


###############################################################################
# TextNode retrieval #
###############################################################################


class FindTextNodeWithTextInput(BaseModel):
text: str = Field("Search TextNode that exactly contains this text.")


def find_text_node_with_text(text: str, driver: GraphDatabase.driver) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_TEXT]-> (t:TextNode)
WHERE t.text CONTAINS '{text}'
RETURN f as FileNode, t AS TextNode
ORDER BY t.node_id
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class FindTextNodeWithTextInFileInput(BaseModel):
text: str = Field("Search TextNode that exactly contains this text.")
basename: str = Field("The basename of FileNode to search TextNode.")


def find_text_node_with_text_in_file(
text: str, basename: str, driver: GraphDatabase.driver
) -> str:
query = f"""\
MATCH (f:FileNode) -[:HAS_TEXT]-> (t:TextNode)
WHERE f.basename = '{basename}' AND t.text CONTAINS '{text}'
RETURN f as FileNode, t AS TextNode
ORDER BY t.node_id
LIMIT {MAX_RESULT}
"""
return neo4j_util.run_neo4j_query(query, driver)


class GetNextTextNodeWithNodeIdInput(BaseModel):
node_id: int = Field("Get the next TextNode of this given node_id.")


def get_next_text_node_with_node_id(node_id: str, driver: GraphDatabase.driver) -> str:
query = f"""\
MATCH (a:TextNode {{ node_id: {node_id} }}) -[:NEXT_CHUNK]-> (b:TextNode)
RETURN b as TextNode
"""
return neo4j_util.run_neo4j_query(query, driver)


###############################################################################
# Other #
###############################################################################


class PreviewFileContentWithBasenameInput(BaseModel):
basename: str = Field("The basename of FileNode to preview.")


def preview_file_content_with_basename(
basename: str, driver: GraphDatabase.driver
) -> str:
source_code_query = f"""\
MATCH (f:FileNode {{ basename: '{basename}' }}) -[:HAS_AST]-> (a:ASTNode)
WITH f, apoc.text.split(a.text, '\\R') AS lines
RETURN f as FileNode, apoc.text.join(lines[0..300], '\\n') AS preview
ORDER BY f.node_id
"""

text_query = f"""\
MATCH (f:FileNode {{ basename: '{basename}' }}) -[:HAS_TEXT]-> (t:TextNode)
WHERE NOT EXISTS((:TextNode) -[:NEXT_CHUNK]-> (t))
RETURN f as FileNode, t.text AS preview
ORDER BY f.node_id
"""

if tree_sitter_parser.supports_file(Path(basename)):
return neo4j_util.run_neo4j_query(source_code_query, driver)
return neo4j_util.run_neo4j_query(text_query, driver)


class GetParentNodeInput(BaseModel):
node_id: str = Field(description="Get parent node of node with this node_id")


def get_parent_node(node_id: int, driver: GraphDatabase.driver) -> str:
query = f"""\
MATCH (p) -[r]-> (c {{ node_id: {node_id} }})
WHERE type(r) IN ['HAS_FILE', 'HAS_TEXT', 'HAS_AST', 'PARENT_OF']
RETURN p as ParentNode, head(labels(p)) as ParentNodeType
ORDER BY p.node_id
"""
return neo4j_util.run_neo4j_query(query, driver)


class GetChildrenNodeInput(BaseModel):
node_id: str = Field(description="Get children nodes of node with this node_id")


def get_children_node(node_id: int, driver: GraphDatabase.driver) -> str:
query = f"""\
MATCH (p {{ node_id: {node_id} }}) -[r]-> (c)
WHERE type(r) IN ['HAS_FILE', 'HAS_TEXT', 'HAS_AST', 'PARENT_OF']
RETURN c as ChildNode, head(labels(p)) as ChildNodeType
ORDER BY c.node_id
"""
return neo4j_util.run_neo4j_query(query, driver)
Empty file added prometheus/utils/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions prometheus/utils/neo4j_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import neo4j


def format_neo4j_result(result: neo4j.Result) -> str:
"""Format a Neo4j result into a string.

Args:
result: The result from a Neo4j query.

Returns:
A string representation of the result.
"""
data = result.data()
output = ""
for index, row_result in enumerate(data):
output += f"Result {index+1}:\n"
for key in sorted(row_result.keys()):
output += f"{key}: {row_result[key]}\n"
output += "\n\n"
return output.strip()


def run_neo4j_query(query: str, driver: neo4j.GraphDatabase.driver) -> str:
"""Run a read-only Neo4j query and format the result into a string.

Args:
query: The query to run.
driver: The Neo4j driver to use.

Returns:
A string representation of the result.
"""

def query_transaction(tx):
result = tx.run(query)
return format_neo4j_result(result)

with driver.session() as session:
return session.execute_read(query_transaction)
Loading
Loading