In [1]:
import os
import time
from fastapi import FastAPI, HTTPException
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.vectorstores import Neo4jVector
from langchain_core.documents import Document
from langchain_ollama import OllamaEmbeddings
from langchain_experimental.llms.ollama_functions import OllamaFunctions

In [2]:
# 1️⃣ CONNECT TO NEO4J DATABASE
graph = Neo4jGraph(
    url= "bolt://localhost:7689" ,
    username="neo4j",
    password="password",
    refresh_schema=False
)

  graph = Neo4jGraph(


In [5]:
# 2️⃣ FUNCTION TO INGEST TEXT INTO THE GRAPH
def ingest_text(text):
    """Converts text into a graph and stores it in Neo4j."""
    
    # Convert text into a document format
    documents = [Document(page_content=text)]

    # Use a language model (LLM) to extract relationships
    llm = ChatOllama(model="llama3", temperature=0)
    graph_transformer = LLMGraphTransformer(llm=llm)

    # Convert text to graph format
    graph_documents = graph_transformer.convert_to_graph_documents(documents)

    # Store the extracted graph data in Neo4j
    graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)

    # Create vector embeddings for advanced search
    embed = OllamaEmbeddings(model="mxbai-embed-large")
    vector_index = Neo4jVector.from_existing_graph(
        embedding=embed,
        url= "bolt://localhost:7689",
        username="neo4j",
        password="password",
        search_type="hybrid",
        node_label="Document",
        text_node_properties=["text"],
        embedding_node_property="embedding"
    )
    
    global vector_retriever
    vector_retriever = vector_index.as_retriever()

# 3️⃣ FUNCTION TO QUERY NEO4J FOR ENTITY RELATIONSHIPS
def query_neo4j(question):
    """Extracts entities from a question and retrieves their relationships from Neo4j."""

    # Define expected output format
    class Entities(BaseModel):
        names: list[str] = Field(..., description="Extracted entities from text")

    # Define a prompt for entity extraction
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Extract organization and person entities from the text."),
        ("human", "Extract entities from: {question}")
    ])
    
    # Use Ollama to extract entities
    llm = OllamaFunctions(model="llama3", format="json", temperature=0)
    entity_chain = prompt | llm.with_structured_output(Entities, include_raw=True)

    # Extract entities from question
    response = entity_chain.invoke({"question": question})
    entities = response['raw'].tool_calls[0]['args']['properties']['names']
    
    print("Retrieved Entities:", entities)
    result = ""

    # Query Neo4j for relationships of extracted entities
    for entity in entities:
        query_response = graph.query(
            """MATCH (p:Person {id: $entity})-[r]->(e)
               RETURN p.id AS source_id, type(r) AS relationship, e.id AS target_id
               LIMIT 50""",
            {"entity": entity}
        )
        result += "\n".join([f"{el['source_id']} - {el['relationship']} -> {el['target_id']}" for el in query_response])

    return result

In [11]:
# 4️⃣ FUNCTION TO PERFORM HYBRID SEARCH
def query_ollama(question):
    """Performs hybrid search using Neo4j graph data and vector embeddings."""

    # Retrieve both graph data and vector search data
    def full_retriever(question: str):
        graph_data = query_neo4j(question)
        vector_data = [el.page_content for el in vector_retriever.invoke(question)]
        
        return f"Graph data: {graph_data}\nVector data: {'#Document '.join(vector_data)}"

    # Define the prompt template
    template = """Answer the question based only on the following context:
    {context}
    Question: {question}
    Answer:"""
    
    # Create a language model pipeline
    prompt = ChatPromptTemplate.from_template(template)
    llm = ChatOllama(model="llama3", temperature=0)
    
    chain = prompt | llm | StrOutputParser()

    # Fix: Pass dictionary instead of string
    return chain.invoke({"context": full_retriever(question), "question": question})

# APOC plugin nedeed!

In [7]:
text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""

# Ingest text into the graph
print("Ingesting text into Neo4j...")
ingest_text(text)

Ingesting text into Neo4j...


In [12]:
# Ask a question
print("\nQuerying the system...\n")
response = query_ollama("Who are Marie Curie and Pierre Curie?")

print("Final Answer:\n", response)


Querying the system...

Retrieved Entities: ['Marie Curie', 'Pierre Curie']




Final Answer:
 Marie Curie is a Polish and naturalised-French physicist and chemist. Pierre Curie is her husband.
