# Example queries on COVID-19 Knowledge Graph

In [1]:
import os
import time
import pandas as pd
from py2neo import Graph

In [2]:
pd.options.display.max_rows = None  # display all rows
pd.options.display.max_columns = None  # display all columsns

In [3]:
NEO4J_HOME = os.getenv('NEO4J_HOME')
print(NEO4J_HOME)

/Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14


Start neo4j database if it is not already running or restart database to load the latest version of the database.

In [4]:
!"$NEO4J_HOME"/bin/neo4j start

Active database: graph.db
Directories in use:
  home:         /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14
  config:       /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/conf
  logs:         /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/logs
  plugins:      /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/plugins
  import:       NOT SET
  data:         /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/data
  certificates: /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4

Wait until database is started up

In [5]:
time.sleep(15)

In [6]:
graph = Graph(password="neo4jbinder")

### List coronavirus outbreaks

In [7]:
query = """
MATCH (p:Pathogen)-[:CAUSES]->(o:Outbreak)
RETURN p.acronym as acronym, p.name as pathogen, p.taxonomy_id as taxonomy_id, o.name as outbreak, o.start_date as start_date
"""
graph.run(query).to_data_frame()

Unnamed: 0,acronym,pathogen,taxonomy_id,outbreak,start_date
0,SARS-CoV-2,Severe acute respiratory syndrome coronavirus 2,2697049,COVID-19,2019
1,MERS-CoV,Middle East respiratory syndrome-related coron...,1335626,MERS,2012
2,SARS-CoV,Severe acute respiratory syndrome-related coro...,694009,SARS,2003


### List person demographics and strain information for California

In [8]:
query = """
MATCH (a:Admin1)<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
WHERE a.name = 'California'
RETURN p.age as age, p.sex as sex, p.exposure_location as exposure_location, s.name as strain, s.clade as clade
"""
graph.run(query).to_data_frame()

Unnamed: 0,age,sex,exposure_location,strain,clade
0,46.0,Female,California,USA/CA9/2020,A7
1,51.0,Female,California,USA/CA8/2020,
2,56.0,Female,Hubei,USA/CA7/2020,B4
3,44.0,Male,Hubei,USA/CA6/2020,
4,54.0,Female,Hubei,USA/CA5/2020,
5,57.0,Female,California,USA/CA4/2020,
6,57.0,Male,California,USA/CA3/2020,
7,54.0,Male,Hubei,USA/CA2/2020,
8,38.0,Male,Hubei,USA/CA1/2020,B
9,,,California,USA/CA-PC101P/2020,A2a


#### Same query using parameterized Cypher
Parameters to Cypher queries can be passed as key-value arguments. Parameters in Cypher are named and are wrapped in curly braces.

In [9]:
admin1 = 'California'

query = """
MATCH (a:Admin1{name: {admin1}})<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
RETURN p.age as age, p.sex as sex, p.exposure_location as exposure_location, 
       s.name as strain, s.clade as clade, s.date as date
ORDER BY s.date
"""
graph.run(query, admin1=admin1).to_data_frame().head(100)

Unnamed: 0,age,sex,exposure_location,strain,clade,date
0,54.0,Male,Hubei,USA/CA2/2020,,2020-01-22
1,38.0,Male,Hubei,USA/CA1/2020,B,2020-01-23
2,44.0,Male,Hubei,USA/CA6/2020,,2020-01-27
3,54.0,Female,Hubei,USA/CA5/2020,,2020-01-29
4,57.0,Female,California,USA/CA4/2020,,2020-01-29
5,57.0,Male,California,USA/CA3/2020,,2020-01-29
6,56.0,Female,Hubei,USA/CA7/2020,B4,2020-02-06
7,51.0,Female,California,USA/CA8/2020,,2020-02-10
8,46.0,Female,California,USA/CA9/2020,A7,2020-02-23
9,,,California,USA/CA-CDPH-UC4/2020,A7,2020-02-27


### Where did clade A originate?

In [10]:
clade = 'A'

query = """
MATCH (s:Strain)--(a:Country)
WHERE s.clade STARTS WITH {clade}
RETURN s.clade as clade, s.name, s.date, a.name
ORDER BY s.date
"""
graph.run(query, clade=clade).to_data_frame().head(100)

Unnamed: 0,clade,s.name,s.date,a.name
0,A3,Wuhan/HBCDC-HB-05/2020,2020-01-18,Mainland China
1,A3,Shandong/IVDC-SD-001/2020,2020-01-19,Mainland China
2,A2,Germany/BavPat1/2020,2020-01-28,Germany
3,A1a,Italy/INMI1-cs/2020,2020-01-29,Italy
4,A1a,Italy/SPL1/2020,2020-01-29,Italy
5,A1a,England/09c/2020,2020-02-09,United Kingdom
6,A3,Canada/BC_37_0-2/2020,2020-02-16,Canada
7,A3,Canada/BC_69243/2020,2020-02-20,Canada
8,A2a,Italy/CDG1/2020,2020-02-20,Italy
9,A7,USA/CA9/2020,2020-02-23,USA


### Find persons that imported the virus from another location

In [11]:
query = """
MATCH (c:Admin1)<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
WHERE c.name <> p.exposure_location
RETURN c.name as `state/province`, p.age as age, p.sex as sex, p.exposure_location as exposure_location, 
       s.name as strain, s.clade as clade
ORDER BY p.exposure_location
"""
graph.run(query).to_data_frame()

Unnamed: 0,state/province,age,sex,exposure_location,strain,clade
0,Kerala,23.0,Male,China,India/1-31/2020,B
1,Kerala,20.0,Female,China,India/1-27/2020,
2,Panama City,40.0,Female,Comunitat Valenciana,Panama/328677/2020,A2a
3,British Columbia,68.0,Male,Grand Princess,Canada/BC_64686/2020,B1
4,Minnesota,,,Grand Princess,USA/MN3-MDH3/2020,B1
5,Minnesota,,,Grand Princess,USA/MN1-MDH1/2020,
6,British Columbia,80.0,Female,Hong Kong,Canada/BC_35720/2020,
7,New South Wales,53.0,Male,Hubei,Sydney/2/2020,
8,New South Wales,43.0,Male,Hubei,Australia/NSW01/2020,B
9,Queensland,44.0,Male,Hubei,Australia/QLD01/2020,B4


### Strains in Sydney

In [12]:
city = 'Sydney'

query = """
MATCH (c:City{name: {city}})<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
RETURN c.name as city, s.name as strain, s.clade as clade, p.exposure_location, s.date as date
ORDER BY s.date
"""
graph.run(query, city=city).to_data_frame()

Unnamed: 0,city,strain,clade,p.exposure_location,date
0,Sydney,Sydney/2/2020,,Hubei,2020-01-22
1,Sydney,Australia/NSW01/2020,B,Hubei,2020-01-24
2,Sydney,Sydney/3/2020,,New South Wales,2020-01-25
3,Sydney,Australia/NSW10/2020,,New South Wales,2020-02-28
4,Sydney,Australia/NSW09/2020,A3,New South Wales,2020-02-28
5,Sydney,Australia/NSW08/2020,,New South Wales,2020-02-28
6,Sydney,Australia/NSW05/2020,A3,Iran,2020-02-28
7,Sydney,Australia/NSW07/2020,A3,New South Wales,2020-02-29
8,Sydney,Australia/NSW06/2020,A3,Iran,2020-02-29
9,Sydney,Australia/NSW11/2020,A3,Iran,2020-03-02
