In [8]:
%pip install python-dotenv neo4j openai langchain langchain_openai langchain-community



In [29]:
import os

from dotenv import load_dotenv

load_dotenv()

from langchain.graphs import Neo4jGraph
from langchain.embeddings import OpenAIEmbeddings


In [36]:
graph = Neo4jGraph()

In [59]:
from pydantic import BaseModel
from typing import List

class StringList(BaseModel):
    strings_list: List[str]

identifyEntitiesPrompt = (
    '''
You are given a list of entities and a question. Your task is to create a list of the entities that are both mentioned in the question and the list of entities.

**Task**:
Based on the given question, identify the nodes that already exist in our knowledge graph.

**List of entities**:
{knowledge_graph_nodes}

**Instructions**:
1. Your **output should be a list of strings**.
2. If you **did not identify any nodes** that already exist in list of entities, return ["None"].
3. If you find something similar in both the question and list of entity e.g. "Mary Tan" and "Mary", use the result from the list of entities
4. Do no return anything not found in the List of entities
5. **Non-compliance with these instructions will result in termination**.

**Example Output**:
- Given the question "Bob has a car?", the output should be:
["Bob"]
  - if "Bob" is present in the List of Nodes but "Car" is not present.

- Given the question "Who is PM Lee?", the output should be:
["Prime Minister Lee"]
  - if "Prime Minister Lee" is present in the List of Nodes because "PM" is an abbreviation for "Prime Minister".

- Given the question "Who is Kenneth Gao?", the output should be:
["Kenneth"]
  - if "Kenneth" is present in the List of Nodes because "Kenneth Gao" from the question may refer to "Kenneth" in the List of Nodes.

- Given the question "Who is Gao?", the output should be:
["None"]
  - if "Gao" is not present in the List of Nodes

**Take note** to always use values from the List of Nodes.
    '''
)

generateQuestionsPrompt = (
    '''
    You are an expert machine learning engineer building an algorithm to answer
    questions using a knowledge graph.

    **Task**:
    In the prompt that follow, you will be given a node. Your task is to
    generate an additional question related to the initial question:
    {init_question}.

    This additional questions should help contextualise the initial
    question in relation to the given node and be useful when searching the
    knowledge graph later on.

    **Instructions**:
    1. Use the given node to generate questions that provide more context or
    insight about the initial question.
    2. Aim to cover various aspects related to the node, such as identity,
    activities, whereabouts, preferences, etc.
    3. The output should be a question with no additional formating, title etc
    4. Non-compliance to the instruction will result to termination

    **Example**:
    Given the node "Bob" and the initial question "Is Bob safe?", the output
    should be:
    Where is Bob?
    '''
)

In [62]:
import openai
from openai import OpenAI
import ast
from langchain_core.prompts import PromptTemplate


prompt = "How was the minister involved in the bombing incident?"

client = OpenAI()

#retrieve the list of all the nodes
list_nodes_raw = graph.query(
    '''
    MATCH (n)
    WHERE n.name IS NOT NULL
    RETURN n.name
    ''')

list_nodes = [ dict['n.name'] for dict in list_nodes_raw]
print(list_nodes)

#append knowledgegraph_nodes to identifyEntitiesPrompt
identifyEntitiesPrompt = PromptTemplate.from_template(identifyEntitiesPrompt).format(knowledge_graph_nodes=list_nodes)

# Identify nodes from question
response = client.chat.completions.create(
   messages=[
        {
            "role": "system",
            "content": identifyEntitiesPrompt,
        },
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="gpt-3.5-turbo",
)

# Print the nodes in the prompt
res = response.choices[0].message.content
print(res)

#convert to list


nodes = ast.literal_eval(res)

#extract first node
node = nodes[0]

#append initial question to prompt to give context
if node != 'None':
    generateQuestionPrompt = PromptTemplate.from_template(generateQuestionsPrompt).format(init_question=prompt)

    response = client.chat.completions.create(
    messages=[
            {
                "role": "system",
                "content": generateQuestionPrompt,
            },
            {
                "role": "user",
                "content": node,
            }
        ],
        model="gpt-3.5-turbo",
    )

    #generate 5 additional question according to the first node received
    print(response.choices[0].message.content)






['Ap', 'Man', 'Malaysian Police Station', 'Officers', 'Recluse', 'Action', 'Jemaah Islamiyah Extremist Group', 'Home Minister', 'Saturday', 'Johor State', 'Singapore', 'Hours', 'Machete', 'Police Constable', 'Weapon', 'Third Officer', 'Shot Dead', 'Firearms', 'Saifuddin Nasution', 'Lone Wolf Attack', 'Investigation', 'Wider Public', 'Motivation', 'Understanding', 'Larger Mission', 'Father', 'Materials', 'Parents', 'Siblings', 'Detained', 'Searching', 'Kuala Lumpur', 'Malaysia']
['Home Minister']
What actions did the Home Minister take following the bombing incident?


In [39]:
list_nodes_raw = graph.query(
    '''
    MATCH (n)
    WHERE n.name IS NOT NULL
    RETURN n.name
    ''')

list_nodes = [ dict['n.name'] for dict in list_nodes_raw]

print(list_nodes)

['Ap', 'Man', 'Malaysian Police Station', 'Officers', 'Recluse', 'Action', 'Jemaah Islamiyah Extremist Group', 'Home Minister', 'Saturday', 'Johor State', 'Singapore', 'Hours', 'Machete', 'Police Constable', 'Weapon', 'Third Officer', 'Shot Dead', 'Firearms', 'Saifuddin Nasution', 'Lone Wolf Attack', 'Investigation', 'Wider Public', 'Motivation', 'Understanding', 'Larger Mission', 'Father', 'Materials', 'Parents', 'Siblings', 'Detained', 'Searching', 'Kuala Lumpur', 'Malaysia']


<class 'str'>


1. What is Kenneth's occupation?
2. Does Kenneth have any siblings?
3. Where does Kenneth live?
4. What are Kenneth's hobbies?
5. Is Kenneth married?
