Neo4j Graph Database

In [4]:
#!pip3 install neo4j
#!pip3 install py2neo

Collecting neo4j
  Downloading neo4j-4.4.1.tar.gz (89 kB)
Building wheels for collected packages: neo4j
  Building wheel for neo4j (setup.py): started
  Building wheel for neo4j (setup.py): finished with status 'done'
  Created wheel for neo4j: filename=neo4j-4.4.1-py3-none-any.whl size=114758 sha256=4f96c4b36438e5775d2f770ac47418e9e2d7cdff9c59387f53ee64c454532091
  Stored in directory: c:\users\nitinnayak\appdata\local\pip\cache\wheels\fd\15\02\8379f87426cd1b74fd5891f49df9fb978423feb97650cd639b
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-4.4.1


You should consider upgrading via the 'c:\users\nitinnayak\appdata\local\programs\python\python37\python.exe -m pip install --upgrade pip' command.


#### Connect to the Database

In [8]:
from neo4j import GraphDatabase

database_name = "apan5400"
username = "neo4j"
password = "123"
uri = "bolt://localhost:7687/" + database_name

driver = GraphDatabase.driver(uri, auth=(username, password))
session = driver.session()

print("Successfully connected to Neo4j!")

Successfully connected to Neo4j!


#### Create 2 Nodes for Billy and Susan

In [9]:
query1 = ("CREATE(Billy:Person{name:'Billy',id:'0000'})")
result1 = session.run(query1)
print("Node for Billy created successfully!")

query2 = ("CREATE(Susan:Person{name:'Susan',id:'0001'})")
result2 = session.run(query2)
print("Node for Susan created successfully!")

Node for Billy created successfully!
Node for Susan created successfully!


#### Connect Billy and Susan as Friends (with Friends_with relationship) (Directional)

In [10]:
query3 = ("MATCH(s:Person{name:'Billy'}),(d:Person{name:'Susan'}) CREATE (s)-[r:Friends_with]->(d)")
result3 = session.run(query3)
print("Billy and Susan are now Friends!")

Billy and Susan are now Friends!


#### Add Attributes for Susan who already exists

In [11]:
query4 = ("MERGE (n:Person {name: 'Susan', id:'0001'})"
         " SET n = {name: 'Susan', id:'0001', age: 34, coat: 'Yellow', hair: 'Brown'} RETURN n")
result4 = session.run(query4)
print("Susan's attributes are now updated!")

Susan's attributes are now updated!


#### Create Nodes for Joanne and Thomas

In [12]:
query5 = ("CREATE(Joanne:Person{name:'Joanne',id:'0003'})")
result5 = session.run(query5)
print("Node for Joanne created successfully!")
          
query6 = ("CREATE(Thomas:Person{name:'Thomas',id:'0002'})")
result6 = session.run(query6)          
print("Node for Thomas created successfully!")  

Node for Joanne created successfully!
Node for Thomas created successfully!


#### Create more connections as friends

In [13]:
## Connect Thomas to Joanne as Friends (Directional)
query7 = ("MATCH(s:Person{name:'Thomas'}),(d:Person{name:'Joanne'}) CREATE (s)-[r:Friends_with]->(d)")
result7 = session.run(query7)
print("Thomas and Joanne are now Friends!")

## Connect Joanne to Susan as Friends (Bi-Directional)
query8 = ("MATCH(s:Person{name:'Joanne'}),(d:Person{name:'Susan'}) CREATE (s)-[r:Friends_with]->(d)")
result8 = session.run(query8) 
print("Joanne and Susan are now Friends!")
query8a = ("MATCH(s:Person{name:'Susan'}),(d:Person{name:'Joanne'}) CREATE (s)-[r:Friends_with]->(d)")
result8a = session.run(query8a) 
print("Joanne and Susan are now Friends Bi-Directionally!")

## Connect Thomas to Billy as Friends (Directional)
query9 = ("MATCH(s:Person{name:'Thomas'}),(d:Person{name:'Billy'}) CREATE (s)-[r:Friends_with]->(d)")
result9 = session.run(query9)
print("Thomas and Billy are now Friends!")

Thomas and Joanne are now Friends!
Joanne and Susan are now Friends!
Joanne and Susan are now Friends Bi-Directionally!
Thomas and Billy are now Friends!


#### Delete All Nodes and Relationships

In [14]:
query10 = ("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r")
result10 = session.run(query10)
print("All Nodes and relationships are deleted!")

All Nodes and relationships are deleted!


#### Import a Graph from a csv file
##### Note: Import the CSV file into the Neo4j directory following the instructions in the Neo4j Installation Guide on Canvas

##### Create Headquartered_in relation between city and country

In [15]:
query11 = (
    "LOAD CSV WITH HEADERS FROM 'file:///companies.csv' AS line "
    "CREATE (company:Company { \
                       name: line.companyName, \
                  employees: toInteger(line.employees),\
                       year: toInteger(line.founded), \
                    revenue: toFloat(line.annualRevenue) \
                            } \
            )"
     "MERGE  (country:Country {name: line.country})"    
     "CREATE (company)-[:HEADQUARTERED_IN]->(country)"
    )

result11 = session.run(query11)
print("All companies are imported from a csv file!")

All companies are imported from a csv file!


##### See which companies match this first degree relation

In [16]:
query14 = ("MATCH (country:Country {name: 'USA'})--(company:Company) \
          RETURN DISTINCT company.name as companies")
result14 = session.run(query14)   
[record["companies"] for record in result14]

['Google', 'Amazon', 'Microsoft', 'Apple']

In [17]:
result10 = session.run(query10)
print("All Nodes and relationships are deleted!")

All Nodes and relationships are deleted!


##### Create company-city-state-country relation

In [18]:
query11 = (
    "LOAD CSV WITH HEADERS FROM 'file:///companies.csv' AS line "
    "CREATE (company:Company { \
                       name: line.companyName, \
                  employees: toInteger(line.employees),\
                       year: toInteger(line.founded), \
                    revenue: toFloat(line.annualRevenue) \
                            } \
            )"
     "MERGE  (city:City {name: line.city})"
     "MERGE  (state:State {name: line.state})"
     "MERGE  (country:Country {name: line.country})"    
     "CREATE (company)-[:HEADQUARTERED_IN]->(city)"
     "CREATE (city)-[:LOCATED_IN]->(state)"
     "CREATE (state)-[:PART_OF]->(country)"
    )

result11 = session.run(query11)
print("All companies are imported from a csv file!")

All companies are imported from a csv file!


#### See the 3rd degree relation between company and country

In [None]:
query14 = ("MATCH (country:Country {name: 'South Korea'})-[*3]-(company:Company) \
          RETURN DISTINCT company.name as companies")
result14 = session.run(query14)   
[record["companies"] for record in result14]

['Samsung Electronics']

#### Select All Nodes in the Graph and print them

In [19]:
result12 = session.run("MATCH (n) RETURN n as nodes")
[record["nodes"] for record in result12]

[<Node id=0 labels=frozenset({'Company'}) properties={'revenue': 274.5, 'year': 1976, 'name': 'Apple', 'employees': 147000}>,
 <Node id=1 labels=frozenset({'Company'}) properties={'revenue': 143.0, 'year': 1975, 'name': 'Microsoft', 'employees': 166475}>,
 <Node id=2 labels=frozenset({'Company'}) properties={'revenue': 386.0, 'year': 1994, 'name': 'Amazon', 'employees': 1298000}>,
 <Node id=3 labels=frozenset({'Company'}) properties={'revenue': 187.5, 'year': 1998, 'name': 'Google', 'employees': 135301}>,
 <Node id=4 labels=frozenset({'Company'}) properties={'revenue': 72.0, 'year': 1999, 'name': 'Alibaba', 'employees': 117600}>,
 <Node id=5 labels=frozenset({'Company'}) properties={'revenue': 76.0, 'year': 1946, 'name': 'Sony', 'employees': 114400}>,
 <Node id=6 labels=frozenset({'Company'}) properties={'revenue': 210.0, 'year': 1969, 'name': 'Samsung Electronics', 'employees': 290000}>,
 <Node id=7 labels=frozenset({'Company'}) properties={'revenue': 193.0, 'year': 1926, 'name': 'Dai

#### Count the number of Nodes in your graph

In [20]:
result13 = session.run("MATCH (n) RETURN count(n) as nodes_count")
[record["nodes_count"] for record in result13]

[27]

#### Get total number of employees for the companies based in Washington State

In [21]:
query14 = ("MATCH (company:Company)-[*1..3]-(state:State {name: 'Washington'}) \
            WITH DISTINCT company \
            RETURN sum(company.employees) as total_employees")
result14 = session.run(query14)   
[record["total_employees"] for record in result14]

[1464475]

#### Get all of the companies that have more than 50000 employees

In [22]:
query15 = ("MATCH (n) WHERE n.employees > 50000 RETURN *")
result15 = session.run(query15)  
for record in result15:
    print("This is the company: " + record[0]['name']+". This company has: " + str(record[0]['employees']) +" employees" )

This is the company: Apple. This company has: 147000 employees
This is the company: Microsoft. This company has: 166475 employees
This is the company: Amazon. This company has: 1298000 employees
This is the company: Google. This company has: 135301 employees
This is the company: Alibaba. This company has: 117600 employees
This is the company: Sony. This company has: 114400 employees
This is the company: Samsung Electronics. This company has: 290000 employees
This is the company: Daimler AG. This company has: 298655 employees


#### Get all companies where name has micro in it

In [None]:
##Note: For Case Sensitive version of this query, run: MATCH (n) where n.name =~ '.*Micro.*' RETURN n.Name, n
#Case not sensitive Query: 
query16 = ("MATCH (n) where n.name =~ '.*(?i)micro.*' RETURN n.name, n")
result16 = session.run(query16)  
for record in result16:
    print("The name of the company is: "+ record[1]['name'])
    print(record)

The name of the company is: Microsoft
<Record n.name='Microsoft' n=<Node id=10 labels=frozenset({'Company'}) properties={'name': 'Microsoft', 'revenue': 143.0, 'employees': 166475, 'year': 1975}>>


#### Delete all nodes & relationships

In [23]:
query = ("MATCH (n) DETACH DELETE n")
result = session.run(query)
print("All Nodes and relationships are deleted!")

All Nodes and relationships are deleted!
