In [1]:
from langchain_community.graphs import Neo4jGraph
import os
from dotenv import load_dotenv
from typing import List
from openai import AzureOpenAI
import os
load_dotenv()
# Warning control
import warnings
warnings.filterwarnings("ignore")

**Load the environment variables from your `.env` file**

In [2]:
azure_openai_api_key = os.environ["OPENAI_API_KEY"]
azure_openai_endpoint = os.environ["OPENAI_API_BASE"]

**Load the Azure openAI instance**

In [3]:
client = AzureOpenAI(
  api_key = azure_openai_api_key,  
  api_version = "2023-07-01-preview",
  azure_endpoint = azure_openai_endpoint
)

def embed_text(text:str)->List:
    """
    Embeds the given text using the specified model.

    Parameters:
        text (str): The text to be embedded.

    Returns:
        List: A list containing the embedding of the text.
    """
    response = client.embeddings.create(
    input = text,
    model= "text-embedding-ada-002"
    )
    return response.data[0].embedding

**Add Neo4j credentials (These information need to be kept secret)**

In [5]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "12345678"
NEO4J_DATABASE = 'neo4j'

In [6]:
graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE)

**Sample question for RAG:**

In [11]:
question = "What movies are about crime?"

**Get the questions embedding:**

In [12]:
question_embedding = embed_text(question)
question_embedding[:10]

[0.023391522467136383,
 -0.032135896384716034,
 0.001850268105044961,
 -0.03603663668036461,
 -0.030766762793064117,
 0.019232455641031265,
 0.007162119727581739,
 -0.01681709662079811,
 -0.006209538783878088,
 -0.017695408314466476]

**Perform Similarity Search using the question's embedding on the vector index of the graph database and get the results**

In [14]:
result = graph.query("""
    with $question_embedding as question_embedding      // Use the provided question embedding as 'question_embedding'
    CALL db.index.vector.queryNodes(                    // Call the vector index query function
        'movie_tagline_embeddings',                     // Name of the vector index to query against
        $top_k,                                         // Number of top results to retrieve
        question_embedding                              // The question embedding to compare against
        ) YIELD node AS movie, score                    // Yield each matched node and its similarity score
    RETURN movie.title, movie.tagline, score            // Return the title, tagline, and similarity score of each movie
    """,
    params={
        "question_embedding": question_embedding,       # Pass the question embedding as a parameter
        "top_k": 3                                      # Specify the number of top results to retrieve
    })
result

[{'movie.title': 'Heat',
  'movie.tagline': 'A Los Angeles crime saga',
  'score': 0.9320709109306335},
 {'movie.title': 'Tom and Huck',
  'movie.tagline': 'The Original Bad Boys.',
  'score': 0.9016362428665161},
 {'movie.title': 'Balto',
  'movie.tagline': 'Part Dog. Part Wolf. All Hero.',
  'score': 0.8778379559516907}]

**Pass the results to an LLM for the final answer**

In [16]:
prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"
messages = [
    {"role": "system", "content": str(
        "You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
    )},
    {"role": "user", "content": prompt}
]

response = client.chat.completions.create(
    model=os.getenv("gpt_deployment_name"),
    messages=messages
)

print(response.choices[0].message.content)

The movies about crime are "Heat" and "Tom and Huck".


-----------------------

**Note: In this usecase, there is a higher chance of hallucination due to lack of enough evidence for the LLM to use its own judgment. The contents of the vector DB and the system role can address this issue to some extent.**

-----------------------

**Second example (in one go):**

In [18]:
question = "What movies are about adventure?"
question_embedding = embed_text(question)
result = graph.query("""
    with $question_embedding as question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings', 
        $top_k, 
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={
        "question_embedding": question_embedding,
        "top_k": 5
    })

prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"
messages = [
    {"role": "system", "content": str(
        "You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
    )},
    {"role": "user", "content": prompt}
]

response = client.chat.completions.create(
    model=os.getenv("gpt_deployment_name"),
    messages=messages
)

print(response.choices[0].message.content)

The movies about adventure are:
1. Toy Story - "The adventure takes off!"
2. Cutthroat Island - "The Course Has Been Set. There Is No Turning Back. Prepare Your Weapons. Summon Your Courage. Discover the Adventure of a Lifetime!"
3. Tom and Huck - "The Original Bad Boys."
4. Jumanji - "Roll the dice and unleash the excitement!"
