# Querying Knowledge Graphs with Cypher

### Import packages and set up Neo4

In [1]:
import os
from dotenv import load_dotenv
import pandas as pd

from langchain_community.graphs import Neo4jGraph

_ = load_dotenv()

### Setting Environment Variables

In [2]:
os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")
os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")

### Loading the Dataset

In [3]:
df = pd.read_csv('../data/vgsales.csv')

In [4]:
df.shape

(16598, 11)

In [5]:
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [6]:
df.tail()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.0,0.0,0.0,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.0,0.0,0.0,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.0,0.0,0.0,0.0,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.0,0.01,0.0,0.0,0.01
16597,16600,Spirits & Spells,GBA,2003.0,Platform,Wanadoo,0.01,0.0,0.0,0.0,0.01


In [7]:
df['Rank'].nunique()

16598

### Conversion of CSV to Neo4j

- Initialize a knowledge graph instance using LangChain's Neo4j integration

In [8]:
kg = Neo4jGraph()

In [9]:
vgsales_query = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/IvanReznikov/kg_snet_vbrl/isham-workshop/workshop/data/vgsales.csv' AS row
MERGE (g:Game {rank:toInteger(row.Rank)})
SET g.name = row.Name,
    g.platform = row.Platform,
    g.year = toInteger(row.Year),
    g.genre = row.Genre,
    g.publisher = row.Publisher,
    g.na_sales = toFloat(row.NA_Sales),
    g.eu_sales = toFloat(row.EU_Sales),
    g.jp_sales = toFloat(row.JP_Sales),
    g.other_sales = toFloat(row.Other_Sales),
    g.global_sales = toFloat(row.Global_Sales)

MERGE (p:Platform {name:row.Platform})
MERGE (g)-[:RELEASED_ON]->(p)

MERGE (pub:Publisher {name:row.Publisher})
MERGE (g)-[:PUBLISHED_BY]->(pub)

MERGE (ge:Genre {name:row.Genre})
MERGE (g)-[:BELONGS_TO_GENRE]->(ge)
"""

kg.query(vgsales_query)

ClientError: {code: Neo.ClientError.Statement.ExternalResourceFailed} {message: Cannot load from URL 'https://raw.githubusercontent.com/IvanReznikov/kg_snet_vbrl/isham-workshop/workshop/data/vgsales.csv': Couldn't load the external resource at: https://raw.githubusercontent.com/IvanReznikov/kg_snet_vbrl/isham-workshop/workshop/data/vgsales.csv ()}

In [10]:
kg.refresh_schema()
print(kg.schema)

Node properties:
Game {rank: INTEGER, name: STRING, platform: STRING, year: INTEGER, genre: STRING, publisher: STRING, na_sales: FLOAT, eu_sales: FLOAT, jp_sales: FLOAT, other_sales: FLOAT, global_sales: FLOAT}
Platform {name: STRING}
Publisher {name: STRING}
Genre {name: STRING}
Relationship properties:

The relationships:
(:Game)-[:RELEASED_ON]->(:Platform)
(:Game)-[:PUBLISHED_BY]->(:Publisher)
(:Game)-[:BELONGS_TO_GENRE]->(:Genre)


### Querying the movie knowledge graph 
- Match all nodes in the graph

In [11]:
cypher = """
  MATCH (n) 
  RETURN count(n)
  """

In [12]:
result = kg.query(cypher)
result

[{'count(n)': 17221}]

In [13]:
cypher = """
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
  """

In [14]:
result = kg.query(cypher)
result

[{'numberOfNodes': 17221}]

In [15]:
print(f"There are {result[0]['numberOfNodes']} nodes in this graph.")

There are 17221 nodes in this graph.


- Match only the `Game` nodes by specifying the node label

In [16]:
cypher = """
  MATCH (n:Game) 
  RETURN count(n) AS numberOfGames
  """
kg.query(cypher)

[{'numberOfGames': 16599}]

- Change the variable name in the node pattern match for improved readability

In [17]:
cypher = """
  MATCH (g:Game) 
  RETURN count(g) AS numberOfGames
  """
kg.query(cypher)

[{'numberOfGames': 16599}]

- Match only the `Publisher` nodes

In [18]:
cypher = """
  MATCH (publisher:Publisher) 
  RETURN count(publisher) AS numberOfPublishers
  """
kg.query(cypher)

[{'numberOfPublishers': 579}]

- Match a single publisher by specifying the value of the `name` property on the `Publisher` node

In [19]:
cypher = """
  MATCH (capcom:Publisher {name:"Capcom"}) 
  RETURN capcom
  """
kg.query(cypher)

[{'capcom': {'name': 'Capcom'}}]

- Return only the `global_sales` property of the matched `Game` node

In [20]:
cypher = """
  MATCH (marioBros:Game {name:"New Super Mario Bros."}) 
  RETURN marioBros.global_sales
  """
kg.query(cypher)

[{'marioBros.global_sales': 30.01}]

- Return two properties

In [21]:
cypher = """
  MATCH (marioBros:Game {name:"New Super Mario Bros."}) 
  RETURN marioBros.global_sales, marioBros.publisher
  """
kg.query(cypher)

[{'marioBros.global_sales': 30.01, 'marioBros.publisher': 'Nintendo'}]

### Cypher patterns with conditional matching

In [22]:
cypher = """
  MATCH (nineties:Game) 
  WHERE nineties.year >= 1990 
    AND nineties.year < 2000 
  RETURN nineties.name
  """


In [23]:
kg.query(cypher)

[{'nineties.name': 'Pokemon Red/Pokemon Blue'},
 {'nineties.name': 'Pokemon Gold/Pokemon Silver'},
 {'nineties.name': 'Super Mario World'},
 {'nineties.name': 'Pokémon Yellow: Special Pikachu Edition'},
 {'nineties.name': 'Super Mario 64'},
 {'nineties.name': 'Super Mario Land 2: 6 Golden Coins'},
 {'nineties.name': 'Gran Turismo'},
 {'nineties.name': 'Super Mario All-Stars'},
 {'nineties.name': 'Mario Kart 64'},
 {'nineties.name': 'Final Fantasy VII'},
 {'nineties.name': 'Gran Turismo 2'},
 {'nineties.name': 'Donkey Kong Country'},
 {'nineties.name': 'Super Mario Kart'},
 {'nineties.name': 'GoldenEye 007'},
 {'nineties.name': 'Final Fantasy VIII'},
 {'nineties.name': 'The Legend of Zelda: Ocarina of Time'},
 {'nineties.name': 'Crash Bandicoot 2: Cortex Strikes Back'},
 {'nineties.name': 'Tekken 3'},
 {'nineties.name': 'Crash Bandicoot 3: Warped'},
 {'nineties.name': 'Crash Bandicoot'},
 {'nineties.name': 'Street Fighter II: The World Warrior'},
 {'nineties.name': 'Driver'},
 {'ninetie

### Pattern matching with multiple nodes

In [24]:
cypher = """
  MATCH (game:Game)-[:BELONGS_TO_GENRE]->(genre:Genre) 
  RETURN game.name, genre.name LIMIT 10
  """
kg.query(cypher)

[{'game.name': 'Wii Sports', 'genre.name': 'Sports'},
 {'game.name': 'Wii Sports Resort', 'genre.name': 'Sports'},
 {'game.name': 'Wii Fit', 'genre.name': 'Sports'},
 {'game.name': 'Wii Fit Plus', 'genre.name': 'Sports'},
 {'game.name': 'FIFA 16', 'genre.name': 'Sports'},
 {'game.name': 'Mario & Sonic at the Olympic Games', 'genre.name': 'Sports'},
 {'game.name': 'FIFA 14', 'genre.name': 'Sports'},
 {'game.name': 'Zumba Fitness', 'genre.name': 'Sports'},
 {'game.name': 'FIFA 12', 'genre.name': 'Sports'},
 {'game.name': 'FIFA 15', 'genre.name': 'Sports'}]

In [25]:
cypher = """
  MATCH (game:Game {name:"New Super Mario Bros."})-[:PUBLISHED_BY]->(m)<-[:PUBLISHED_BY]-(otherGames) 
  RETURN otherGames.name, otherGames.global_sales
  """
kg.query(cypher)

[{'otherGames.name': 'Wii Sports', 'otherGames.global_sales': 82.74},
 {'otherGames.name': 'Super Mario Bros.', 'otherGames.global_sales': 40.24},
 {'otherGames.name': 'Mario Kart Wii', 'otherGames.global_sales': 35.82},
 {'otherGames.name': 'Wii Sports Resort', 'otherGames.global_sales': 33.0},
 {'otherGames.name': 'Pokemon Red/Pokemon Blue',
  'otherGames.global_sales': 31.37},
 {'otherGames.name': 'Tetris', 'otherGames.global_sales': 30.26},
 {'otherGames.name': 'Wii Play', 'otherGames.global_sales': 29.02},
 {'otherGames.name': 'New Super Mario Bros. Wii',
  'otherGames.global_sales': 28.62},
 {'otherGames.name': 'Duck Hunt', 'otherGames.global_sales': 28.31},
 {'otherGames.name': 'Mario Kart DS', 'otherGames.global_sales': 23.42},
 {'otherGames.name': 'Pokemon Gold/Pokemon Silver',
  'otherGames.global_sales': 23.1},
 {'otherGames.name': 'Wii Fit', 'otherGames.global_sales': 22.72},
 {'otherGames.name': 'Wii Fit Plus', 'otherGames.global_sales': 22.0},
 {'otherGames.name': 'Super 

### Delete data from the graph

Before

In [26]:
cypher = """
MATCH (game:Game {name:"Nintendogs"})-[published_by:PUBLISHED_BY]->(publisher:Publisher)
RETURN game.name, publisher.name
"""
kg.query(cypher)

[]

Deleting Data

We need to delete relationship first then only we can delete node.

In [27]:
cypher = """
MATCH (game:Game {name:"Nintendogs"})-[published_by:PUBLISHED_BY]->(publisher:Publisher)
DELETE published_by
"""
kg.query(cypher)

[]

After

In [28]:
cypher = """
MATCH (game:Game {name:"Nintendogs"})-[published_by:PUBLISHED_BY]->(publisher:Publisher)
RETURN game.name, publisher.name
"""
kg.query(cypher)

[]