In [1]:
from langchain_neo4j import Neo4jGraph
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
import re

## Setup Local Model

In [2]:

llm = LlamaCpp(
    model_path="../models/mistral-7b-v0.1.Q5_K_M.gguf",
    #lib_path="../llama-cpp/libllama.so",
    temperature=0.0,
    max_tokens=80, # Keep this relatively low for Cypher generation
    verbose=False,
)

# Setup a potentially different LLM for final answer (optional, can be same)
# For final answers, you might want a slightly higher max_tokens
llm_final_answer = LlamaCpp(
    model_path="../models/mistral-7b-v0.1.Q5_K_M.gguf", # Using the same model for simplicity
    #lib_path="../llama-cpp/libllama.so",
    temperature=0.2, 
    max_tokens=200, 
    verbose=False,
)

llama_context: n_batch is less than GGML_KQ_MASK_PAD - increasing to 64
llama_context: n_ctx_per_seq (512) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
llama_context: n_batch is less than GGML_KQ_MASK_PAD - increasing to 64
llama_context: n_ctx_per_seq (512) < n_ctx_train (32768) -- the full capacity of the model will not be utilized


In [3]:
graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="neo4jneo4j",
)

## Output Parser

In [4]:

class CypherOutputParser:
    def parse(self, text: str) -> str:
        text = text.strip()
        if "```" in text:
            match = re.search(r'```(?:cypher)?\n?(.*?)\n?```', text, re.DOTALL)
            if match:
                text = match.group(1).strip()
            else:
                text = text.replace('```', '').strip()
        prefixes = [
            "the answer is:", "here is the cypher query:", "cypher query:",
            "cypher:", "query:", "answer:", "question:"
        ]
        for prefix in prefixes:
            if text.lower().startswith(prefix):
                text = text[len(prefix):].strip()
        lines = text.split('\n')
        cypher_lines = []
        found = False
        for line in lines:
            line = line.strip()
            if not line:
                continue
            if any(p in line.lower() for p in [
                'explanation', 'note:', 'this query', 'the query',
                'question:', 'schema:', 'generate only'
            ]):
                continue
            if (found or any(line.upper().startswith(k) for k in
                ['MATCH', 'CREATE', 'RETURN', 'WITH', 'MERGE', 'DELETE',
                 'DETACH', 'SET', 'REMOVE', 'UNWIND', 'CALL', 'SHOW'])):
                found = True
                cypher_lines.append(line)
        result = '\n'.join(cypher_lines).strip()
        result = re.sub(r'\n+(This|The|Note:|Explanation:).*', '', result, flags=re.IGNORECASE | re.DOTALL)
        return result

def add_limit_to_cypher(cypher: str, limit: int = 10) -> str:
    cypher_upper = cypher.upper()
    if 'LIMIT' not in cypher_upper:
        if 'ORDER BY' in cypher_upper:
            parts = cypher.split('ORDER BY')
            return f"{parts[0].strip()} LIMIT {limit} ORDER BY {parts[1]}"
        else:
            return f"{cypher.strip()} LIMIT {limit}"
    return cypher


## LangChain Pipeline

In [5]:
# 1. Get the graph schema
schema = graph.get_schema

# 2. Define the prompt template for LLM to generate Cypher
cypher_generation_prompt = PromptTemplate.from_template(
    """Generate ONLY a Cypher query. No explanation, no markdown, no code blocks.

Schema: {schema}
Question: {question}

Cypher:"""
)

# 3. Instantiate the parser
cypher_parser = CypherOutputParser()

## Define a function to execute the query and format results

In [6]:
def execute_and_format_cypher_query(input_dict: dict) -> dict:
    cypher_query = input_dict["cypher_query"]
    original_question = input_dict["question"] # Get original question
    print(f"\nExecuting Cypher: {cypher_query}")
    try:
        results = graph.query(cypher_query)
        if results:
            # Convert results to a string, handling potential list of dicts
            formatted_results = "\n".join([str(item) for item in results])
            print(f"Query results: {formatted_results}")
            return {"question": original_question, "results": formatted_results}
        else:
            return {"question": original_question, "results": "No results found."}
    except Exception as e:
        return {"question": original_question, "results": f"Error executing Cypher query: {str(e)}"}

## Define the prompt template for the final answer generation

In [7]:
final_answer_prompt = PromptTemplate.from_template(
    """Based on the following question and query results, provide a concise and clear answer.
If no results were found, state that.

Question: {question}
Query Results: {results}

Answer:"""
)


## Build the LangChain pipeline
The initial input to the chain is {"question": "..."}

In [8]:

cypher_generation_chain = (
    RunnablePassthrough.assign(schema=lambda x: schema) # Inject schema
    | cypher_generation_prompt
    | llm
    | RunnableLambda(cypher_parser.parse) # Parse raw LLM output
    | RunnableLambda(lambda cypher: add_limit_to_cypher(cypher, limit=6)) # Add limit
)

# Combine the Cypher generation with query execution and final answer generation
# We need to preserve the original question throughout the chain for the final answer prompt.
full_pipeline = (
    # Step 1: Pass the initial question through and generate Cypher
    {"question": RunnablePassthrough()}
    | RunnablePassthrough.assign(
        cypher_query=cypher_generation_chain.with_config(run_name="Generate Cypher") # Run the first part of the chain
    )
    # Step 2: Execute the Cypher query. This step receives {"question": "...", "cypher_query": "..."}
    | RunnableLambda(execute_and_format_cypher_query).with_config(run_name="Execute Query") # Output is {"question": "...", "results": "..."}
    # Step 3: Generate the final answer using the question and results
    | final_answer_prompt
    | llm_final_answer # Use the potentially different LLM for final answer
)

# USE

In [9]:
# Test the new full pipeline
test_question = "Show me titles of papers written in Spanish?"
print(f"Original Question: {test_question}")
final_result = full_pipeline.invoke({"question": test_question})
print("\nFinal Answer from LLM:", final_result)
print('end')

Original Question: Show me titles of papers written in Spanish?

Executing Cypher: MATCH (p:Paper)-[:WRITTEN_IN]->(l:Language) WHERE l.code = 'es' RETURN p.title LIMIT 6
Query results: {'p.title': "Ein Schluss für Rudolfs 'Alexander'? Überlegungen zum Cgm 203"}
{'p.title': 'Crítica, dialéctica y utopía'}
{'p.title': 'Das Überraschende: Wittgenstein sobre o surpreendente em Matemática'}
{'p.title': 'La versión aramaica del profeta Nahum'}
{'p.title': 'Einstein, Gödel, Heidegger. Algunas consideraciones sobre el concepto del tiempo'}
{'p.title': 'La patata: producción, comercialización e industria'}

Final Answer from LLM: 
The following papers were written in Spanish:
- Ein Schluss für Rudolfs 'Alexander'? Überlegungen zum Cgm 203
- Crítica, dialéctica y utopía
- Das Überraschende: Wittgenstein sobre o surpreendente em Matemática
- La versión aramaica del profeta Nahum
- Einstein, Gödel, Heidegger. Algunas consideraciones sobre el concepto del tiempo
- La patata: producción, comercializ

In [10]:
# Test the new full pipeline
test_question = "Show me titles of papers written in English?"
print(f"Original Question: {test_question}")
final_result = full_pipeline.invoke({"question": test_question})
print("\nFinal Answer from LLM:", final_result)
print('end')

Original Question: Show me titles of papers written in English?

Executing Cypher: MATCH (p:Paper)-[:WRITTEN_IN]->(l:Language) WHERE l.code = 'en' RETURN p.title LIMIT 6
Query results: {'p.title': 'Power play online: : Exploring master suppression techniques on Facebook'}
{'p.title': 'Soil groups of Western Australia : a simple guide to the main soils of Western Australia (2nd rev ed - superseded by 4th ed)'}
{'p.title': 'Precursor solution for polyimide/silica composite material, its manufacture method, and polymide/silica composite material having low volume shrinkage'}
{'p.title': 'Vertical Track Stiffness as a New Parameter Involved in Designing High-Speed Railway Infrastructure'}
{'p.title': 'Lowering of brachial pulse pressure in 9379 hypertensives with type 2 diabetes and reduction of cardiovascular events'}
{'p.title': 'Supporting students with disabilities'}

Final Answer from LLM: 
The titles of papers written in English are: 'Power play online: : Exploring master suppression