# Installations

In [1]:
 !pip install neo4j openai langchain
 !pip install getpass4

Collecting neo4j
  Downloading neo4j-5.14.0.tar.gz (192 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/192.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/192.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.4/192.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.0.324-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

# Importing packages

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
import getpass

# Connecting to Neo4j Database

In [3]:
graph = Neo4jGraph(
    url="bolt://44.211.132.3:7687",
    username="neo4j",
    password="wing-rehabilitation-screw"
)

# Set up Environment Variables

In [4]:
import os
name = 'OPENAI_API_KEY'
secret = getpass.getpass("Enter the OpenAI API Key")
os.environ[name] = secret
print(f"Have successfully set up {name} as an environment variable.")

Enter the OpenAI API Key··········
Have successfully set up OPENAI_API_KEY as an environment variable.


# Querying the graph database to return answers to natural text

In [5]:
graph.refresh_schema()

In [6]:
print(graph.schema)


        Node properties are the following:
        [{'labels': 'Loc', 'properties': [{'property': 'name', 'type': 'STRING'}]}]
        Relationship properties are the following:
        [{'properties': [{'property': 'cost', 'type': 'INTEGER'}], 'type': 'ROAD'}]
        The relationships are the following:
        ['(:Loc)-[:ROAD]->(:Loc)']
        


In [7]:
# Setting up ChatGPT integration with Graph Database
chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True,
)

# Basic operations

### 1. Find nodes and edges.

In [None]:
chain.run("""
How many nodes does the graph have?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n) RETURN count(n)[0m
Full Context:
[32;1m[1;3m[{'count(n)': 6}][0m

[1m> Finished chain.[0m


'The graph has 6 nodes.'

In [None]:
chain.run("""
How many edges does the graph have?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH ()-[:ROAD]->() 
RETURN count(*) AS edge_count[0m
Full Context:
[32;1m[1;3m[{'edge_count': 10}][0m

[1m> Finished chain.[0m


'The graph has 10 edges.'

In [None]:
chain.run("""
How many nodes and edges does the graph have?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n) 
RETURN count(n) AS nodes, size((:Loc)-[:ROAD]->(:Loc)) AS edges[0m


ValueError: ignored

**Finding**: The LLM can find nodes and edges if prompted seperately but not together.

### 2. Finding the degree of each node.

In [None]:
chain.run("""
What is the degree of node a in the graph? Allow it to be case-insensitive.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[:ROAD]->()
WHERE toLower(a.name) = 'a'
RETURN count(*) AS degree[0m
Full Context:
[32;1m[1;3m[{'degree': 3}][0m

[1m> Finished chain.[0m


'The degree of node a in the graph is 3.'

In [None]:
chain.run("""
What is the degree of all nodes in the graph? Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[r:ROAD]->(m:Loc)
RETURN n.name, count(r), m.name[0m
Full Context:
[32;1m[1;3m[{'n.name': 'A', 'count(r)': 1, 'm.name': 'D'}, {'n.name': 'A', 'count(r)': 1, 'm.name': 'C'}, {'n.name': 'A', 'count(r)': 1, 'm.name': 'B'}, {'n.name': 'B', 'count(r)': 1, 'm.name': 'D'}, {'n.name': 'C', 'count(r)': 1, 'm.name': 'E'}, {'n.name': 'C', 'count(r)': 1, 'm.name': 'D'}, {'n.name': 'D', 'count(r)': 1, 'm.name': 'D'}, {'n.name': 'D', 'count(r)': 1, 'm.name': 'F'}, {'n.name': 'D', 'count(r)': 1, 'm.name': 'E'}, {'n.name': 'E', 'count(r)': 1, 'm.name': 'F'}][0m


KeyboardInterrupt: ignored

In [None]:
chain.run("""
What is the degree of all nodes and its edges in the graph? Use count function. To do this, first match all starting nodes. Then, use count function to find all edges linked to that node.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)
RETURN n.name, count((n)-[:ROAD]->()) AS degree[0m


ValueError: ignored

**Findings**: LLM can find the degree if the individual node is specified. Few-shot learning was attempted to see if LLM can learn how to generate correct prompt.

**Correct prompt**:

```
MATCH (a:Loc)-[r:ROAD]->()

RETURN a.name, count(r) AS degree
```



### 3. Find indegree and outdegree centrality measures

#### For individual nodes.

In [None]:
chain.run("""
Find the in degree centrality of node A. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[:ROAD]->(:Loc)
WHERE a.name = 'A'
RETURN count(*) AS inDegreeCentrality[0m
Full Context:
[32;1m[1;3m[{'inDegreeCentrality': 3}][0m

[1m> Finished chain.[0m


'The in degree centrality of node A is 3.'

In [None]:
chain.run("""
Find the number of incoming links to node A. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[:ROAD]->(b:Loc)
WHERE b.name = 'A'
RETURN count(a)[0m
Full Context:
[32;1m[1;3m[{'count(a)': 0}][0m

[1m> Finished chain.[0m


'The number of incoming links to node A is 0.'

In [None]:
chain.run("""
Find the indegree centrality of node A. Use count function. Indegree centrality is the number of incoming edges to a node.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[:ROAD]->(:Loc)
WHERE a.name = 'A'
RETURN count(*) AS indegree_centrality[0m
Full Context:
[32;1m[1;3m[{'indegree_centrality': 3}][0m

[1m> Finished chain.[0m


'The indegree centrality of node A is 3. This means that there are 3 incoming edges to node A.'

In [None]:
chain.run("""
Find the outdegree centrality of node A. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[:ROAD]->(b:Loc)
WHERE a.name = 'A'
RETURN count(b) AS outdegree_centrality[0m
Full Context:
[32;1m[1;3m[{'outdegree_centrality': 3}][0m

[1m> Finished chain.[0m


'The outdegree centrality of node A is 3.'

**Findings**:

1. It cannot find the indegree centrality but able to find the number of incoming links.
2. It is able to find the outdegree centrality.

#### For all nodes.

In [None]:
chain.run("""
Find the indegree centrality of all nodes. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)
RETURN n.name, count(()-[:ROAD]->(n)) AS indegree_centrality[0m


ValueError: ignored

In [None]:
chain.run("""
Find the number of incoming links to all nodes. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)<-[:ROAD]-()
RETURN n.name, count(*) AS incoming_links[0m
Full Context:
[32;1m[1;3m[{'n.name': 'B', 'incoming_links': 1}, {'n.name': 'C', 'incoming_links': 1}, {'n.name': 'D', 'incoming_links': 4}, {'n.name': 'E', 'incoming_links': 2}, {'n.name': 'F', 'incoming_links': 2}][0m

[1m> Finished chain.[0m


'The count function can be used to find the number of incoming links to all nodes.'

In [None]:
chain.run("""
Indegree centrality is the number of incoming edges to a node. Find the indegree centrality of all nodes. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)
RETURN n.name, count(()-[:ROAD]->(n)) AS indegree_centrality[0m


ValueError: ignored

In [None]:
chain.run("""
Find the outdegree centrality of all nodes. Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[r:ROAD]->()
RETURN n.name, count(r) AS outdegree_centrality[0m
Full Context:
[32;1m[1;3m[{'n.name': 'A', 'outdegree_centrality': 3}, {'n.name': 'B', 'outdegree_centrality': 1}, {'n.name': 'C', 'outdegree_centrality': 2}, {'n.name': 'D', 'outdegree_centrality': 3}, {'n.name': 'E', 'outdegree_centrality': 1}][0m

[1m> Finished chain.[0m


'The outdegree centrality of all nodes can be found using the count function.'

**Findings**:

1. It cannot find the indegree centrality but able to find the number of incoming links for all.
2. It is able to find the outdegree centrality for all nodes.

Correct Cypher queries:


```
`# Indegree centrality`
MATCH ()-[r:ROAD]->(a:Loc)
RETURN a.name, count(r) AS indegree

`# Outdegree centrality`
MATCH (a:Loc)-[r:ROAD]->()
RETURN a.name, count(r) AS outdegree

```





### 4. Find betweenness centrality measures

#### For individual nodes.

In [None]:
chain.run("""
Find the in betweenness centrality of node A.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc {name: 'A'})
CALL algo.betweenness.stream('Loc', 'ROAD', {direction: 'both'})
YIELD nodeId, centrality
WHERE id(a) = nodeId
RETURN centrality[0m


ClientError: ignored

**Findings**:

1. It is able to identify the correct query (direction: 'out' is missed).

#### For all nodes.

In [None]:
chain.run("""
Find the in betweenness centrality of all nodes.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mCALL algo.betweenness.stream('Loc', 'ROAD', {direction: 'both'})
YIELD nodeId, centrality
RETURN algo.getNodeById(nodeId).name AS Node, centrality AS BetweennessCentrality[0m


ClientError: ignored

**Findings**:

1. It can find the correct query.

Correct Cypher queries:


```
`# Betweenness centrality`
CALL algo.betweenness.stream('Loc', 'ROAD', {direction: 'out'})
YIELD nodeId, centrality
RETURN algo.getNodeById(nodeId).name AS Node, centrality AS BetweennessCentrality

```





### 5. Are there any isolated nodes?

In [None]:
chain.run("""
Are there any isolated nodes in the network? Can you name all?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)
WHERE NOT (n)-[:ROAD]-()
RETURN n.name[0m
Full Context:
[32;1m[1;3m[{'n.name': 'G'}][0m

[1m> Finished chain.[0m


'Yes, there are isolated nodes in the network. The only isolated node in the network is node G.'

**Findings**:

1. It can successfully identify the isolated nodes.

### 6. Is there a complete graph?

In [None]:
chain.run("""
Find all graphs in which each pair of nodes is connected by an edge.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n1:Loc)-[:ROAD]->(n2:Loc)
RETURN n1, n2[0m
Full Context:
[32;1m[1;3m[{'n1': {'name': 'A'}, 'n2': {'name': 'B'}}, {'n1': {'name': 'A'}, 'n2': {'name': 'C'}}, {'n1': {'name': 'D'}, 'n2': {'name': 'D'}}, {'n1': {'name': 'C'}, 'n2': {'name': 'D'}}, {'n1': {'name': 'B'}, 'n2': {'name': 'D'}}, {'n1': {'name': 'A'}, 'n2': {'name': 'D'}}, {'n1': {'name': 'D'}, 'n2': {'name': 'E'}}, {'n1': {'name': 'C'}, 'n2': {'name': 'E'}}, {'n1': {'name': 'E'}, 'n2': {'name': 'F'}}, {'n1': {'name': 'D'}, 'n2': {'name': 'F'}}][0m

[1m> Finished chain.[0m


'Based on the provided information, there are several graphs in which each pair of nodes is connected by an edge. Here are the graphs that satisfy this condition:\n\n1. Graph with nodes A, B, and C: \n   - A is connected to B\n   - A is connected to C\n\n2. Graph with nodes D and E:\n   - D is connected to D (self-loop)\n   - D is connected to E\n\n3. Graph with nodes B, C, and D:\n   - B is connected to D\n   - C is connected to D\n\n4. Graph with nodes A, D, and F:\n   - A is connected to D\n   - D is connected to F\n\n5. Graph with nodes C, D, and E:\n   - C is connected to D\n   - C is connected to E\n\n6. Graph with nodes E and F:\n   - E is connected to F\n\nThese are all the graphs in which each pair of nodes is connected by an edge based on the given information.'

**Findings**:

1. Need to be informed what is a complate graph.

### 7. Is there a clique?

In [8]:
chain.run("""
Find all cliques in the graph.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n1:Loc)-[r1:ROAD]->(n2:Loc)-[r2:ROAD]->(n3:Loc)-[r3:ROAD]->(n1)
RETURN n1, n2, n3[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but without any information about the graph, I am unable to find all the cliques. Could you please provide more details or specifications about the graph?"

In [10]:
chain.run("""
Find the largest subset of nodes that are directly and completely connected to the rest of the set.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[:ROAD]->(m:Loc)
WITH n, count(m) as connections
ORDER BY connections DESC
LIMIT 1
MATCH (n)-[:ROAD]->(m:Loc)
RETURN collect(m) as largest_subset[0m
Full Context:
[32;1m[1;3m[{'largest_subset': [{'name': 'D'}, {'name': 'C'}, {'name': 'B'}]}][0m

[1m> Finished chain.[0m


'The largest subset of nodes that are directly and completely connected to the rest of the set is composed of nodes D, C, and B.'

In [14]:
chain.run("""
A clique is the largest subset of nodes that are directly and completely connected to the rest of the set. Find all the cliques in the graph? Use count function.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[:ROAD]->(m:Loc)
WITH n, count(m) as numConnections
MATCH (n)-[:ROAD]->(m:Loc)
WITH n, collect(m) as connections, numConnections
WHERE size(connections) = numConnections
RETURN connections[0m
Full Context:
[32;1m[1;3m[{'connections': [{'name': 'D'}, {'name': 'C'}, {'name': 'B'}]}, {'connections': [{'name': 'D'}, {'name': 'F'}, {'name': 'E'}]}, {'connections': [{'name': 'E'}, {'name': 'D'}]}, {'connections': [{'name': 'D'}]}, {'connections': [{'name': 'F'}]}][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have enough information to answer your question."

**Findings**:
1. Cliques need to be defined to the LLM.
2. Preference towards using size() function which does not exist. It needs to be mentioned to use count().

### 8. What is the density of this graph?

findings:
1. Without telling LLM the formula of density, the query and answer is wrong.
2. After including the formula of density, LLM is able to generate the correct query. However, it will round the density to an integer which is not we want. (Density is always inside [0,1]
3. After including the formula of density and tell LLM to return a decimal, the answer is correct.

In [None]:
chain.run("""
Calculate the density of this graph.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (l1:Loc)-[r:ROAD]->(l2:Loc)
RETURN count(r) / (count(DISTINCT l1) * count(DISTINCT l2)) AS density[0m
Full Context:
[32;1m[1;3m[{'density': 0}][0m

[1m> Finished chain.[0m


'The density of the graph is 0.'

In [None]:
chain.run("""
Calculate the density of this graph. (the formula of density is: density = observed number of edges/maximum possible edges).
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH ()-[:ROAD]->() 
WITH count(*) AS observed_edges
MATCH (l:Loc)
WITH count(l) AS num_nodes, observed_edges
RETURN observed_edges / (num_nodes * (num_nodes - 1)) AS density[0m
Full Context:
[32;1m[1;3m[{'density': 0}][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have enough information to calculate the density of the graph."

In [None]:
chain.run("""
Calculate the density of this graph. (the formula of density is: density = observed number of edges/maximum possible edges).return the number in decimal
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH ()-[:ROAD]->() 
WITH count(*) AS observedEdges
MATCH (l:Loc)
WITH observedEdges, count(l) AS totalLocs
RETURN toFloat(observedEdges) / (toFloat(totalLocs) * (toFloat(totalLocs) - 1)) AS density[0m
Full Context:
[32;1m[1;3m[{'density': 0.23809523809523808}][0m

[1m> Finished chain.[0m


'The density of the graph is 0.23809523809523808.'

### 9. Reciprocity

Finding: Similar to density, we need to explicitly specify  "return the answer in decimal" to get the desired to get the desire result.

In [None]:
chain.run("""
Calculate the reciprocity of this database.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[r:ROAD]->(b:Loc)
WITH COUNT(*) AS total, SUM(CASE WHEN (b)-[:ROAD]->(a) THEN 1 ELSE 0 END) AS reciprocal
RETURN reciprocal / total AS reciprocity[0m
Full Context:
[32;1m[1;3m[{'reciprocity': 0}][0m

[1m> Finished chain.[0m


'The reciprocity of this database is 0.'

In [None]:
chain.run("""
Calculate the reciprocity of this database. return me in decimal
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Loc)-[r:ROAD]->(b:Loc)
WITH COUNT(*) AS total, SUM(CASE WHEN (b)-[:ROAD]->(a) THEN 1 ELSE 0 END) AS reciprocal
RETURN reciprocal / toFloat(total) AS reciprocity[0m
Full Context:
[32;1m[1;3m[{'reciprocity': 0.1}][0m

[1m> Finished chain.[0m


'The reciprocity of this database is 0.1.'

### 10. Global Clustering Coefficient

Findings
1. When there is isolated nodes in network, calculation will fail due to divide by zero error. (the degree of an isolated node is zero)
2. With specifying exclude isolated nodes, the query still contains error.

In [None]:
chain.run("""
Calculate the Average Clustering Coefficient of this network.
""")




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[r:ROAD]->(m:Loc)
WITH n, count(r) as degree
MATCH (n)-[:ROAD]->(m)-[:ROAD]->(n)
RETURN sum(degree) / (count(n) * (count(n) - 1)) as clustering_coefficient[0m


ClientError: ignored

In [None]:
chain.run("""
Calculate the Average Clustering Coefficient of this network. Exclude isolated nodes.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[r:ROAD]->(m:Loc)
WITH n, count(DISTINCT m) as degree
WHERE degree > 1
MATCH (n)-[:ROAD]->(m)-[:ROAD]->(o)
WHERE n <> o
WITH n, count(DISTINCT o) as triangles
RETURN sum(triangles) / sum(degree * (degree - 1)) as clustering_coefficient[0m


ValueError: ignored

**Findings**:

1. Need to be informed what is a complete graph.

### 11. Find the path length.

In [19]:
chain.run("""
Find the path length for Node A from Node C.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH path = (a:Loc {name: 'A'})-[:ROAD*]->(c:Loc {name: 'C'})
RETURN length(path) AS path_length[0m
Full Context:
[32;1m[1;3m[{'path_length': 1}][0m

[1m> Finished chain.[0m


'The path length for Node A from Node C is 1.'

**Findings:**
1. It treats the graph as an unweighted graph.

### 12. Find the local clustering coefficient.

#### Single Node

In [23]:
chain.run("""
Find the local clustering coefficient of all nodes. Exclude isolated nodes. Local clustering coefficient is the triangle count.
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Loc)-[:ROAD]->(m:Loc)-[:ROAD]->(o:Loc)-[:ROAD]->(n)
WHERE id(n) < id(m) AND id(m) < id(o)
RETURN n.name AS Node, count(*) AS TriangleCount
ORDER BY TriangleCount DESC[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information to answer your question."

**Findings:**
1. It needs context on what is local clustering effect.