In [1]:
!nvidia-smi
!nvcc --version

Mon Apr 14 17:53:10 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...    Off | 00000000:01:00.0 Off |                  N/A |
| N/A   59C    P8               6W /  60W |     10MiB /  4096MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
Installing collected packages: neo4j
Successfully installed neo4j-5.28.1


In [7]:
# import networkx as nx
# import nx_arangodb as nxadb

# from arango import ArangoClient

import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from random import randint
import re
import threading


from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
# from langchain_openai import ChatOpenAI
# from langchain_community.graphs import ArangoGraph
# from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import os
import subprocess
from git import Repo
from tree_sitter import Language, Parser
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from neo4j import GraphDatabase
from langchain_ollama import ChatOllama
# from langchain_deepseek import ChatDeepSeek

In [15]:
#!/usr/bin/env python3
"""
GitHub Repository Graph RAG - Simple Implementation
Creates a Neo4j graph from Python code in a GitHub repository and provides query capabilities.
"""

import os
import sys
import ast
import git
import logging
from pathlib import Path
import shutil
import argparse
from typing import Dict, List, Any, Set, Tuple

from neo4j import GraphDatabase
import ollama

# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Neo4j connection parameters
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "travelmate" 

# ----- Repository Ingestion -----

def clone_repository(repo_url: str, workspace_dir: str = "./workspace") -> Path:
    """Clone a GitHub repository and return its path."""
    # Create workspace directory if it doesn't exist
    workspace_path = Path(workspace_dir)
    workspace_path.mkdir(parents=True, exist_ok=True)
    
    # Extract repo name from URL
    repo_name = repo_url.split("/")[-1]
    if repo_name.endswith(".git"):
        repo_name = repo_name[:-4]
    
    # Create directory for repo
    repo_path = workspace_path / repo_name
    
    # Remove if exists
    if repo_path.exists():
        logger.info(f"Repository {repo_name} already exists, removing")
        shutil.rmtree(repo_path)
    
    # Clone the repository
    logger.info(f"Cloning repository {repo_url} to {repo_path}")
    try:
        git.Repo.clone_from(repo_url, repo_path)
        logger.info(f"Successfully cloned {repo_name}")
        return repo_path
    except git.GitCommandError as e:
        logger.error(f"Failed to clone repository: {e}")
        raise

# ----- Python Code Parsing -----

def find_python_files(repo_path: Path) -> List[Path]:
    """Find all Python files in the repository."""
    return list(repo_path.glob("**/*.py"))

class CodeVisitor(ast.NodeVisitor):
    """AST visitor to extract code elements."""
    
    def __init__(self, file_path: str):
        self.file_path = file_path
        self.functions = []
        self.classes = []
        self.variables = []
        self.imports = []
        self.function_calls = []
        self.current_context = None
    
    def visit_FunctionDef(self, node):
        func_data = {
            'name': node.name,
            'line_number': node.lineno,
            'end_line': node.end_lineno,
            'args': [arg.arg for arg in node.args.args],
            'source': ast.unparse(node),
            'context': self.current_context
        }
        
        prev_context = self.current_context
        self.current_context = node.name
        self.functions.append(func_data)
        
        # Visit children
        self.generic_visit(node)
        
        self.current_context = prev_context
    
    def visit_ClassDef(self, node):
        class_data = {
            'name': node.name,
            'line_number': node.lineno,
            'end_line': node.end_lineno,
            'bases': [ast.unparse(base) for base in node.bases],
            'source': ast.unparse(node),
            'context': self.current_context
        }
        
        prev_context = self.current_context
        self.current_context = node.name
        self.classes.append(class_data)
        
        # Visit children
        self.generic_visit(node)
        
        self.current_context = prev_context
    
    def visit_Assign(self, node):
        for target in node.targets:
            if isinstance(target, ast.Name):
                var_data = {
                    'name': target.id,
                    'line_number': node.lineno,
                    'value': ast.unparse(node.value),
                    'context': self.current_context
                }
                self.variables.append(var_data)
        
        self.generic_visit(node)
    
    def visit_Import(self, node):
        for name in node.names:
            import_data = {
                'name': name.name,
                'line_number': node.lineno,
                'alias': name.asname,
                'context': self.current_context
            }
            self.imports.append(import_data)
        
        self.generic_visit(node)
    
    def visit_ImportFrom(self, node):
        for name in node.names:
            import_data = {
                'name': f"{node.module}.{name.name}" if node.module else name.name,
                'line_number': node.lineno,
                'alias': name.asname,
                'context': self.current_context
            }
            self.imports.append(import_data)
        
        self.generic_visit(node)
    
    def visit_Call(self, node):
        if isinstance(node.func, ast.Name):
            call_data = {
                'name': node.func.id,
                'line_number': node.lineno,
                'args': [ast.unparse(arg) for arg in node.args],
                'context': self.current_context
            }
            self.function_calls.append(call_data)
        elif isinstance(node.func, ast.Attribute):
            call_data = {
                'name': f"{ast.unparse(node.func.value)}.{node.func.attr}",
                'line_number': node.lineno,
                'args': [ast.unparse(arg) for arg in node.args],
                'context': self.current_context
            }
            self.function_calls.append(call_data)
        
        self.generic_visit(node)

def parse_python_file(file_path: Path) -> Dict:
    """Parse a Python file and extract code elements."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            source_code = f.read()
        
        tree = ast.parse(source_code)
        visitor = CodeVisitor(str(file_path))
        visitor.visit(tree)
        
        return {
            'file_path': str(file_path),
            'functions': visitor.functions,
            'classes': visitor.classes,
            'variables': visitor.variables,
            'imports': visitor.imports,
            'function_calls': visitor.function_calls
        }
    except SyntaxError as e:
        logger.error(f"Syntax error in {file_path}: {e}")
        return {
            'file_path': str(file_path),
            'functions': [],
            'classes': [],
            'variables': [],
            'imports': [],
            'function_calls': [],
            'error': str(e)
        }
    except Exception as e:
        logger.error(f"Error parsing {file_path}: {e}")
        return {
            'file_path': str(file_path),
            'functions': [],
            'classes': [],
            'variables': [],
            'imports': [],
            'function_calls': [],
            'error': str(e)
        }

# ----- Neo4j Graph Construction -----

def create_neo4j_schema(driver):
    """Create constraints and indexes in Neo4j."""
    with driver.session() as session:
        # Create constraints
        session.run("CREATE CONSTRAINT repository_name IF NOT EXISTS FOR (r:Repository) REQUIRE r.name IS UNIQUE")
        session.run("CREATE CONSTRAINT file_path IF NOT EXISTS FOR (f:File) REQUIRE f.path IS UNIQUE")
        session.run("CREATE CONSTRAINT function_unique IF NOT EXISTS FOR (f:Function) REQUIRE (f.name, f.file_path) IS UNIQUE")
        session.run("CREATE CONSTRAINT class_unique IF NOT EXISTS FOR (c:Class) REQUIRE (c.name, c.file_path) IS UNIQUE")
        
        # Create indexes
        session.run("CREATE INDEX file_name IF NOT EXISTS FOR (f:File) ON (f.name)")
        session.run("CREATE INDEX function_name IF NOT EXISTS FOR (f:Function) ON (f.name)")
        session.run("CREATE INDEX class_name IF NOT EXISTS FOR (c:Class) ON (c.name)")
        session.run("CREATE INDEX variable_name IF NOT EXISTS FOR (v:Variable) ON (v.name)")
        session.run("CREATE INDEX module_name IF NOT EXISTS FOR (m:Module) ON (m.name)")

def add_repository_to_graph(driver, repo_path: Path, repo_url: str):
    """Add repository to Neo4j graph."""
    repo_name = repo_path.name
    
    with driver.session() as session:
        # Create repository node
        session.run("""
            MERGE (r:Repository {name: $name})
            SET r.url = $url,
                r.path = $path
        """, name=repo_name, url=repo_url, path=str(repo_path))
        
        logger.info(f"Added repository {repo_name} to graph")

def add_file_to_graph(driver, file_data: Dict, repo_name: str):
    """Add file and its elements to Neo4j graph."""
    file_path = file_data['file_path']
    file_name = Path(file_path).name
    relative_path = file_path.replace(str(Path().absolute()), "").lstrip("/\\")
    
    with driver.session() as session:
        # Create file node
        session.run("""
            MATCH (r:Repository {name: $repo_name})
            MERGE (f:File {path: $path})
            SET f.name = $name,
                f.relative_path = $relative_path
            MERGE (r)-[:CONTAINS]->(f)
        """, repo_name=repo_name, path=file_path, name=file_name, relative_path=relative_path)
        
        # Add functions
        for func in file_data['functions']:
            session.run("""
                MATCH (f:File {path: $file_path})
                MERGE (func:Function {name: $name, file_path: $file_path})
                SET func.line_number = $line_number,
                    func.end_line = $end_line,
                    func.args = $args,
                    func.source = $source,
                    func.context = $context
                MERGE (f)-[:CONTAINS]->(func)
                MERGE (func)-[:DEFINED_IN]->(f)
            """, file_path=file_path, name=func['name'], line_number=func['line_number'],
                end_line=func['end_line'], args=func['args'], source=func['source'],
                context=func['context'])
        
        # Add classes
        for cls in file_data['classes']:
            session.run("""
                MATCH (f:File {path: $file_path})
                MERGE (c:Class {name: $name, file_path: $file_path})
                SET c.line_number = $line_number,
                    c.end_line = $end_line,
                    c.bases = $bases,
                    c.source = $source,
                    c.context = $context
                MERGE (f)-[:CONTAINS]->(c)
            """, file_path=file_path, name=cls['name'], line_number=cls['line_number'],
                end_line=cls['end_line'], bases=cls['bases'], source=cls['source'],
                context=cls['context'])
            
            # Add class inheritance relationships
            for base in cls['bases']:
                if '.' not in base:  # Local class
                    session.run("""
                        MATCH (c:Class {name: $class_name, file_path: $file_path})
                        MATCH (base:Class {name: $base_name})
                        MERGE (c)-[:INHERITS_FROM]->(base)
                    """, class_name=cls['name'], file_path=file_path, base_name=base)
        
        # Add variables
        for var in file_data['variables']:
            session.run("""
                MATCH (f:File {path: $file_path})
                MERGE (v:Variable {name: $name, file_path: $file_path, line_number: $line_number})
                SET v.value = $value,
                    v.context = $context
                MERGE (f)-[:CONTAINS]->(v)
            """, file_path=file_path, name=var['name'], line_number=var['line_number'],
                value=var['value'], context=var['context'])
            
            # Add context relationships (functions or classes that contain this variable)
            if var['context']:
                if any(func['name'] == var['context'] for func in file_data['functions']):
                    session.run("""
                        MATCH (v:Variable {name: $var_name, file_path: $file_path, line_number: $line_number})
                        MATCH (func:Function {name: $context_name, file_path: $file_path})
                        MERGE (func)-[:CONTAINS]->(v)
                    """, var_name=var['name'], file_path=file_path, line_number=var['line_number'],
                        context_name=var['context'])
                elif any(cls['name'] == var['context'] for cls in file_data['classes']):
                    session.run("""
                        MATCH (v:Variable {name: $var_name, file_path: $file_path, line_number: $line_number})
                        MATCH (c:Class {name: $context_name, file_path: $file_path})
                        MERGE (c)-[:CONTAINS]->(v)
                    """, var_name=var['name'], file_path=file_path, line_number=var['line_number'],
                        context_name=var['context'])
        
        # Add imports
        for imp in file_data['imports']:
            session.run("""
                MATCH (f:File {path: $file_path})
                MERGE (m:Module {name: $name})
                SET m.alias = $alias
                MERGE (f)-[:IMPORTS]->(m)
            """, file_path=file_path, name=imp['name'], alias=imp['alias'])
        
        # Add function calls
        for call in file_data['function_calls']:
            session.run("""
                MATCH (f:File {path: $file_path})
                MATCH (caller:Function {context: $context, file_path: $file_path})
                MERGE (callee:Function {name: $callee_name})
                MERGE (caller)-[:CALLS {line_number: $line_number, args: $args}]->(callee)
            """, file_path=file_path, context=call['context'], callee_name=call['name'].split('.')[-1],
                line_number=call['line_number'], args=call['args'])
    
    logger.info(f"Added file {file_name} to graph")

def build_neo4j_graph(repo_path: Path, repo_url: str, neo4j_uri: str, neo4j_user: str, neo4j_password: str):
    """Build a Neo4j graph from repository code."""
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
    
    try:
        # Create schema
        create_neo4j_schema(driver)
        
        # Add repository
        add_repository_to_graph(driver, repo_path, repo_url)
        
        # Find and parse Python files
        python_files = find_python_files(repo_path)
        logger.info(f"Found {len(python_files)} Python files")
        
        # Process each file
        for file_path in python_files:
            file_data = parse_python_file(file_path)
            add_file_to_graph(driver, file_data, repo_path.name)
        
        logger.info(f"Successfully built graph for repository {repo_path.name}")
    finally:
        driver.close()

# ----- Query Interface -----

def query_graph(query_text: str, neo4j_uri: str, neo4j_user: str, neo4j_password: str):
    """Query the graph using natural language via Ollama."""
    
    # Template for prompting Ollama to convert natural language to Cypher
    prompt_template = """
    You are a Neo4j Cypher query generator. Convert the following natural language question with no extra constraints from your side 
    into a valid Cypher query for a code repository graph with the following schema:
    
    Node Types:
    - Repository (properties: name, url, path)
    - File (properties: name, path, relative_path)
    - Function (properties: name, line_number, end_line, args, source, context, file_path)
    - Class (properties: name, line_number, end_line, bases, source, context, file_path)
    - Variable (properties: name, line_number, value, context, file_path)
    - Module (properties: name, alias)
    
    Relationship Types (DONT CONFUSE):
    - CONTAINS (Repository→File, File→Function, Class→Function, etc.)
    - CALLS (Function→Function)
    - IMPORTS (File→Module)
    - INHERITS_FROM (Class→Class)
    
    Question: {query}
    
    Return only the Cypher query without any explanation.
    """
    
    # Generate Cypher query using Ollama
    response = ollama.chat(
        model="codellama:latest",
        messages=[
            {
                "role": "user",
                "content": prompt_template.format(query=query_text)
            }
        ]
    )
    
    cypher_query = response['message']['content'].strip()
    logger.info(f"Generated Cypher query: {cypher_query}")
    
    # Execute query against Neo4j
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
    try:
        with driver.session() as session:
            result = session.run(cypher_query)
            records = [record.data() for record in result]
            return {
                "query": query_text,
                "cypher": cypher_query,
                "results": records
            }
    finally:
        driver.close()

In [16]:

# # ----- Main Function -----

# def main():
#     parser = argparse.ArgumentParser(description="GitHub Repository Graph RAG")
#     parser.add_argument("--repo", "-r", required=True, help="GitHub repository URL")
#     parser.add_argument("--query", "-q", help="Natural language query to run")
#     parser.add_argument("--workspace", "-w", default="./workspace", help="Workspace directory")
#     parser.add_argument("--neo4j-uri", default=NEO4J_URI, help="Neo4j URI")
#     parser.add_argument("--neo4j-user", default=NEO4J_USER, help="Neo4j username")
#     parser.add_argument("--neo4j-password", default=NEO4J_PASSWORD, help="Neo4j password")
    
#     args = parser.parse_args()
    
#     # Clone repository
#     repo_path = clone_repository(args.repo, args.workspace)
    
#     # Build graph
#     build_neo4j_graph(repo_path, args.repo, args.neo4j_uri, args.neo4j_user, args.neo4j_password)
    
#     # Run query if provided
#     if args.query:
#         result = query_graph(args.query, args.neo4j_uri, args.neo4j_user, args.neo4j_password)
#         print(f"Query: {result['query']}")
#         print(f"Cypher: {result['cypher']}")
#         print("Results:")
#         for i, record in enumerate(result['results']):
#             print(f"  {i+1}. {record}")

# if __name__ == "__main__":
#     main()

In [21]:
GITHUB_URL = "https://github.com/karask/python-bitcoin-utils"
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "travelmate"  

In [25]:
repo_path = clone_repository(GITHUB_URL, "./workspace")

build_neo4j_graph(repo_path, GITHUB_URL, NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

2025-04-16 15:30:20,204 - INFO - Repository python-bitcoin-utils already exists, removing
2025-04-16 15:30:20,208 - INFO - Cloning repository https://github.com/karask/python-bitcoin-utils to workspace/python-bitcoin-utils
2025-04-16 15:30:25,493 - INFO - Successfully cloned python-bitcoin-utils
2025-04-16 15:30:25,964 - INFO - Added repository python-bitcoin-utils to graph
2025-04-16 15:30:25,974 - INFO - Found 63 Python files
2025-04-16 15:30:26,425 - INFO - Added file setup.py to graph
2025-04-16 15:30:26,829 - INFO - Added file constants.py to graph
2025-04-16 15:30:30,721 - INFO - Added file keys.py to graph
2025-04-16 15:30:30,952 - INFO - Added file ripemd160.py to graph
2025-04-16 15:30:30,957 - INFO - Added file __init__.py to graph
2025-04-16 15:30:31,727 - INFO - Added file utils.py to graph
2025-04-16 15:30:32,037 - INFO - Added file script.py to graph
2025-04-16 15:30:32,265 - INFO - Added file bech32.py to graph
2025-04-16 15:30:32,591 - INFO - Added file block.py to grap

In [27]:
query_graph("Which all file changes do I need to bring if i change the function '_generate_merkle_path'", NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

2025-04-16 15:31:16,230 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-04-16 15:31:16,231 - INFO - Generated Cypher query: MATCH (f:Function)-[:CONTAINS]->(r:Repository)
WHERE f.name = '_generate_merkle_path' AND r.url = 'https://github.com/neo4j-examples/movies/blob/master/movies-import.csv'
WITH f, r
MATCH (f)-[:CALLS]->(g:Function)-[:CONTAINS]->(r)
WHERE NOT g.name = '_generate_merkle_path'
RETURN DISTINCT g;


{'query': "Which all file changes do I need to bring if i change the function '_generate_merkle_path'",
 'cypher': "MATCH (f:Function)-[:CONTAINS]->(r:Repository)\nWHERE f.name = '_generate_merkle_path' AND r.url = 'https://github.com/neo4j-examples/movies/blob/master/movies-import.csv'\nWITH f, r\nMATCH (f)-[:CALLS]->(g:Function)-[:CONTAINS]->(r)\nWHERE NOT g.name = '_generate_merkle_path'\nRETURN DISTINCT g;",
 'results': []}

## Agent

In [28]:
def query_graph_hosted(query_text: str, neo4j_uri: str, neo4j_user: str, neo4j_password: str):
    """Query the graph using natural language via Ollama."""
    
    # Template for prompting Ollama to convert natural language to Cypher
    prompt_template = """
    You are a Neo4j Cypher query generator. Do not alter any spaces or _ in a name. Convert the following natural language question with no extra constraints from your side 
    into a valid Cypher query for a code repository graph with the following schema:
    
    Node Types:
    - Repository (properties: name, url, path)
    - File (properties: name, path, relative_path)
    - Function (properties: name, line_number, end_line, args, source, context, file_path)
    - Class (properties: name, line_number, end_line, bases, source, context, file_path)
    - Variable (properties: name, line_number, value, context, file_path)
    - Module (properties: name, alias)
    
    Relationship Types (DONT CONFUSE THIS):
    - CONTAINS (Repository→File, File→Function, Class→Function, etc.)
    - CALLS (Function→Function)
    - IMPORTS (File→Module)
    - INHERITS_FROM (Class→Class)
    
    Question: {query}
    
    Return only the Cypher query without any explanation.
    """

    llm = ChatOllama(model="mistral-nemo:latest", base_url="http://french.braidpool.net:11434")
    
    # Create the system prompt that includes instructions and tool usage
    system_template = """THE GENERATED QUERY MUST BE A VALID CYPHER QUERY. ALSO ENSURE THAT THE DIRECTION OF THE RELATIONSHIP MUST BE CORRECT."""
    
    # user_template = "{question}"
    
    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_template),
        ("human", prompt_template)
    ])
    
    # Create the RAG chain
    chain = (
        prompt
        | llm
        | StrOutputParser()
    )
    query = query_text
    print("\nUser Query:", query)
    
    response = chain.invoke(query)
    print(response)
    cypher_query = (" ").join(response.split("\n"))
    # cypher_query = response['message']['content'].strip()
    logger.info(f"Generated Cypher query: {cypher_query}")
    
    # Execute query against Neo4j
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
    try:
        with driver.session() as session:
            result = session.run(cypher_query)
            records = [record.data() for record in result]
            return {
                "query": query_text,
                "cypher": cypher_query,
                "results": records
            }
    finally:
        driver.close()

In [33]:
query_graph_hosted("Which all file changes do I need to bring if i change the function '_generate_merkle_path'", NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)


User Query: Which all file changes do I need to bring if i change the function '_generate_merkle_path'


2025-04-16 15:33:41,960 - INFO - HTTP Request: POST http://french.braidpool.net:11434/api/chat "HTTP/1.1 200 OK"
2025-04-16 15:33:42,356 - INFO - Generated Cypher query: MATCH (f:Function {name: '_generate_merkle_path'})<-[CONTAINS]-(file:File) RETURN file


MATCH (f:Function {name: '_generate_merkle_path'})<-[CONTAINS]-(file:File)
RETURN file


{'query': "Which all file changes do I need to bring if i change the function '_generate_merkle_path'",
 'cypher': "MATCH (f:Function {name: '_generate_merkle_path'})<-[CONTAINS]-(file:File) RETURN file",
 'results': [{'file': {'path': 'workspace/python-bitcoin-utils/bitcoinutils/utils.py',
    'name': 'utils.py',
    'relative_path': 'workspace/python-bitcoin-utils/bitcoinutils/utils.py'}}]}

In [31]:
def answer_question(question: str, neo4j_uri: str, neo4j_user: str, neo4j_password: str):
    """
    Answer natural language questions about a bitcoin code repository using the Neo4j graph.
    
    Args:
        question: The natural language question about the repository
        neo4j_uri: Neo4j database URI
        neo4j_user: Neo4j username
        neo4j_password: Neo4j password
        
    Returns:
        A dictionary containing the question, answer, and supporting details
    """
    # Import necessary libraries
    from langchain_community.chat_models import ChatOllama
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_core.output_parsers import StrOutputParser
    from neo4j import GraphDatabase
    import logging
    
    # Setup basic logging
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    
    def query_graph(query_text: str):
        """Query the graph using natural language via Ollama."""
        
        # Template for prompting Ollama to convert natural language to Cypher
        prompt_template = """
        You are a Neo4j Cypher query generator. Do not alter any spaces or _ in a name. Convert the following natural language question with no extra constraints from your side 
        into a valid Cypher query for a code repository graph with the following schema:
        
        Node Types:
        - Repository (properties: name, url, path)
        - File (properties: name, path, relative_path)
        - Function (properties: name, line_number, end_line, args, source, context, file_path)
        - Class (properties: name, line_number, end_line, bases, source, context, file_path)
        - Variable (properties: name, line_number, value, context, file_path)
        - Module (properties: name, alias)
        
        Relationship Types (DONT CONFUSE THIS):
        - CONTAINS (Repository→File, File→Function, Class→Function, etc.)
        - CALLS (Function→Function)
        - IMPORTS (File→Module)
        - INHERITS_FROM (Class→Class)
        
        Question: {query}
        
        Return only the Cypher query without any explanation.
        """

        # Initialize ChatOllama with the specified model and base URL
        llm = ChatOllama(model="mistral-nemo:latest", base_url="http://french.braidpool.net:11434")
        
        # Create the system prompt that includes instructions
        system_template = """THE GENERATED QUERY MUST BE A VALID CYPHER QUERY. ALSO ENSURE THAT THE DIRECTION OF THE RELATIONSHIP MUST BE CORRECT."""
        
        # Create the prompt template with system and human messages
        prompt = ChatPromptTemplate.from_messages([
            ("system", system_template),
            ("human", prompt_template)
        ])
        
        # Create the chain to process the query
        chain = (
            prompt
            | llm
            | StrOutputParser()
        )
        
        print("\nUser Query:", query_text)
        
        # Execute the chain with the query
        response = chain.invoke({"query": query_text})
        
        # Clean up the response by joining multiple lines
        cypher_query = (" ").join(response.split("\n"))
        logger.info(f"Generated Cypher query: {cypher_query}")
        
        # Execute query against Neo4j
        driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
        try:
            with driver.session() as session:
                result = session.run(cypher_query)
                records = [record.data() for record in result]
                return {
                    "query": query_text,
                    "cypher": cypher_query,
                    "results": records
                }
        finally:
            driver.close()

    # First get raw query results from Neo4j
    try: 
        query_results = query_graph(question)
    except Exception as e:
        logger.error(f"Error querying Neo4j: {e}")
        # Retry once
        query_results = query_graph(question)
    
    # Initialize ChatOllama for answer generation
    llm_answer = ChatOllama(model="mistral-nemo:latest", base_url="http://french.braidpool.net:11434")
    
    # Define a template for Ollama to interpret the results and generate an answer
    interpretation_template = """
    You are an expert code analyst. Based on the following question and query results from a 
    Neo4j graph database of Python code, provide a clear and concise answer.
    
    Question: {question}
    
    Cypher Query Used: {cypher}
    
    Query Results: {results}
    
    Provide a human-readable answer that directly answers the question. Include specific 
    details from the results like names, counts, and relationships. If the results are empty,
    state that no matching information was found. Be concise but thorough.
    """
    
    # Format the results as a readable string
    results_str = ""
    if not query_results["results"]:
        results_str = "No results found."
    else:
        for i, record in enumerate(query_results["results"]):
            results_str += f"Result {i+1}: {record}\n"
    
    # Create prompt template for the answer generation
    answer_prompt = ChatPromptTemplate.from_messages([
        ("human", interpretation_template)
    ])
    
    # Create chain for answer generation
    answer_chain = (
        answer_prompt
        | llm_answer
        | StrOutputParser()
    )
    
    # Generate the answer using the chain
    answer = answer_chain.invoke({
        "question": question,
        "cypher": query_results["cypher"],
        "results": results_str
    })
    
    return {
        "question": question,
        "answer": answer,
        "cypher_query": query_results["cypher"],
        "raw_results": query_results["results"]
    }

In [32]:
answer_question("Which all file changes do I need to bring if i change the function '_generate_merkle_path'", NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

  llm = ChatOllama(model="mistral-nemo:latest", base_url="http://french.braidpool.net:11434")



User Query: Which all file changes do I need to bring if i change the function '_generate_merkle_path'


2025-04-16 15:32:26,889 - INFO - Generated Cypher query: MATCH (f:Function {name: '_generate_merkle_path'})<-[:CONTAINS]-(file:File) RETURN file


{'question': "Which all file changes do I need to bring if i change the function '_generate_merkle_path'",
 'answer': 'You only need to bring changes for one file:\n\n- **File**: `utils.py`\n- **Path**: `workspace/python-bitcoin-utils/bitcoinutils/utils.py`',
 'cypher_query': "MATCH (f:Function {name: '_generate_merkle_path'})<-[:CONTAINS]-(file:File) RETURN file",
 'raw_results': [{'file': {'path': 'workspace/python-bitcoin-utils/bitcoinutils/utils.py',
    'name': 'utils.py',
    'relative_path': 'workspace/python-bitcoin-utils/bitcoinutils/utils.py'}}]}