# Querying Knowledge Graphs with Cypher

### Import packages and set up Neo4

In [None]:
import os
from dotenv import load_dotenv
import pandas as pd

from langchain_community.graphs import Neo4jGraph

_ = load_dotenv()

### Setting Environment Variables

In [None]:
os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")
os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")

### Loading the Dataset

In [None]:
df = pd.read_csv('../data/vgsales.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df['Rank'].nunique()

### Conversion of CSV to Neo4j

- Initialize a knowledge graph instance using LangChain's Neo4j integration

In [None]:
kg = Neo4jGraph()

In [None]:
vgsales_query = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/KNAI-AI/knai-workshop/main/data/vgsales.csv' AS row
MERGE (g:Game {rank:toInteger(row.Rank)})
SET g.name = row.Name,
    g.platform = row.Platform,
    g.year = toInteger(row.Year),
    g.genre = row.Genre,
    g.publisher = row.Publisher,
    g.na_sales = toFloat(row.NA_Sales),
    g.eu_sales = toFloat(row.EU_Sales),
    g.jp_sales = toFloat(row.JP_Sales),
    g.other_sales = toFloat(row.Other_Sales),
    g.global_sales = toFloat(row.Global_Sales)

MERGE (p:Platform {name:row.Platform})
MERGE (g)-[:RELEASED_ON]->(p)

MERGE (pub:Publisher {name:row.Publisher})
MERGE (g)-[:PUBLISHED_BY]->(pub)

MERGE (ge:Genre {name:row.Genre})
MERGE (g)-[:BELONGS_TO_GENRE]->(ge)
"""

kg.query(vgsales_query)

In [None]:
kg.refresh_schema()
print(kg.schema)

### Querying the movie knowledge graph 
- Match all nodes in the graph

In [None]:
cypher = """
  MATCH (n) 
  RETURN count(n)
  """

In [None]:
result = kg.query(cypher)
result

In [None]:
cypher = """
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
  """

In [None]:
result = kg.query(cypher)
result

In [None]:
print(f"There are {result[0]['numberOfNodes']} nodes in this graph.")

- Match only the `Game` nodes by specifying the node label

In [None]:
cypher = """
  MATCH (n:Game) 
  RETURN count(n) AS numberOfGames
  """
kg.query(cypher)

- Change the variable name in the node pattern match for improved readability

In [None]:
cypher = """
  MATCH (g:Game) 
  RETURN count(g) AS numberOfGames
  """
kg.query(cypher)

- Match only the `Publisher` nodes

In [None]:
cypher = """
  MATCH (publisher:Publisher) 
  RETURN count(publisher) AS numberOfPublishers
  """
kg.query(cypher)

- Match a single publisher by specifying the value of the `name` property on the `Publisher` node

In [None]:
cypher = """
  MATCH (nintendo:Publisher {name:"Nintendo"}) 
  RETURN nintendo
  """
kg.query(cypher)

- Return only the `global_sales` property of the matched `Game` node

In [None]:
cypher = """
  MATCH (gta:Game {name:"Grand Theft Auto: San Andreas"}) 
  RETURN gta.global_sales
  """
kg.query(cypher)

- Return two properties

In [None]:
cypher = """
  MATCH (marioBros:Game {name:"New Super Mario Bros."}) 
  RETURN marioBros.global_sales, marioBros.publisher
  """
kg.query(cypher)

### Cypher patterns with conditional matching

In [None]:
cypher = """
  MATCH (nineties:Game) 
  WHERE nineties.year >= 1990 
    AND nineties.year < 2000 
  RETURN nineties.name
  """


In [None]:
kg.query(cypher)

### Pattern matching with multiple nodes

In [None]:
cypher = """
  MATCH (game:Game)-[:BELONGS_TO_GENRE]->(genre:Genre) 
  RETURN game.name, genre.name LIMIT 10
  """
kg.query(cypher)

In [None]:
cypher = """
  MATCH (game:Game {name:"New Super Mario Bros."})-[:PUBLISHED_BY]->(m)<-[:PUBLISHED_BY]-(otherGames) 
  RETURN otherGames.name, otherGames.global_sales
  """
kg.query(cypher)

### Delete data from the graph

Before

In [None]:
cypher = """
MATCH (game:Game {name:"Nintendogs"})-[published_by:PUBLISHED_BY]->(publisher:Publisher)
RETURN game.name, publisher.name
"""
kg.query(cypher)

Deleting Data

We need to delete relationship first then only we can delete node.

In [None]:
cypher = """
MATCH (game:Game {name:"Nintendogs"})-[published_by:PUBLISHED_BY]->(publisher:Publisher)
DELETE published_by
"""
kg.query(cypher)

After

In [None]:
cypher = """
MATCH (game:Game {name:"Nintendogs"})-[published_by:PUBLISHED_BY]->(publisher:Publisher)
RETURN game.name, publisher.name
"""
kg.query(cypher)