In [1]:
import neo4j
import pandas as pd

# Connect to neo4j and the wipe_out function

In [2]:
def connect_db():
    driver = neo4j.GraphDatabase.driver(uri="neo4j://0.0.0.0:7687", auth=("neo4j","password"))
    session = driver.session(database="neo4j")
    return session
    
def wipe_out_db(session):
    # wipe out database by deleting all nodes and relationships
    
    # similar to SELECT * FROM graph_db in SQL
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

session = connect_db()
wipe_out_db(session) 

# Create nodes and import data

In [3]:
query = '''

CREATE
  (alice:User {name: 'Alice', position: 'Data Scientist', interests: ['NoSQL', 'AI']}),
  (bob:User {name: 'Bob', position: 'Student', interests: ['Machine Learning', 'AI']}),
  (charlie:User {name: 'Charlie', position: 'Data Scientist', interests: ['Deep Learning', 'NoSQL']}),
  (danny:User {name: 'Danny', position: 'Student', interests: ['Computer Vison', 'Machine Learning']}),
  (emma:User {name: 'Emma', position: 'Student', interests: ['Statistics', 'AI']}),
  (faith:User {name: 'Frank', position: 'Data Analyst', interests: ['Data Visualization', 'Machine Learning']}),

  (post1:Post {
    post_id: 1, 
    topic: 'NoSQL',
    content: 'Exploring the capabilities of Graph Databases reveals a flexible approach to handling connected data. Unlike traditional databases, NoSQL graph databases like Neo4j offer powerful ways to solve complex problems involving deep relationships.', 
    author: 'Alice', 
    timestamp: datetime('2023-01-01T12:00:00'), 
    likes: 10
  }),
  (post2:Post {
    post_id: 2, 
    topic: 'AI',
    content: 'Advancements in Graph Databases have ushered in a new era of data storage and retrieval. The ability to traverse relationships efficiently opens up AI applications that can dynamically learn and adapt based on the interconnected nature of data.', 
    author: 'Alice', 
    timestamp: datetime('2023-01-02T13:00:00'), 
    likes: 8
  }),
  (post3:Post {
    post_id: 3, 
    topic: 'Deep Learning',
    content: 'The breakthrough in Quantum Computing has the potential to revolutionize industries by performing complex calculations at unprecedented speeds. This leap forward could be the key to solving some of the most challenging problems in Deep Learning.', 
    author: 'Charlie', 
    timestamp: datetime('2023-01-03T14:00:00'), 
    likes: 20
  }),
  (post4:Post {
    post_id: 4, 
    topic: 'NoSQL',
    content: 'Blockchain technology is transforming the Big Data landscape by enhancing security and ensuring the integrity of data transactions. By combining NoSQL databases with blockchain, we can create a transparent, distributed ledger that is both scalable and robust.', 
    author: 'Charlie', 
    timestamp: datetime('2023-01-04T15:00:00'), 
    likes: 25
  }),
  (post5:Post {
    post_id: 5, 
    topic: 'Machine Learning',
    content: 'The rise of Big Data analytics has become a cornerstone in the field of Computer Vision. By leveraging Machine Learning algorithms, we can interpret and understand visual information from the world around us at a granular level.', 
    author: 'Danny', 
    timestamp: datetime('2023-01-05T16:00:00'), 
    likes: 5
  }),

  (event1:Event {event_id:1, name: 'DSI Annual Meetup', date: date('2023-06-01'), location: 'DSI',
                description: 'Networking event for DSI students and alumni.', attendees: 10}),
  (event2:Event {event_id:2, name: 'AI Conference', date: date('2023-07-15'), location: 'DSI', 
                description: 'Conference on the latest trends in AI.', attendees: 50}),

  (reply1:Reply {content: 'The versatility of graph databases in managing and querying connected data is amazing!', 
                author: 'Bob', timestamp: datetime('2023-01-02T12:10:00'), likes: 2}),
  (reply2:Reply {content: 'It is truly exciting to think about the implications of quantum computing on deep learning!', 
                author: 'Emma', timestamp: datetime('2023-01-03T14:30:00'), likes: 3}),

  
  (alice)-[:FRIENDS_WITH]->(bob),
  (bob)-[:FRIENDS_WITH]->(charlie),
  (charlie)-[:FRIENDS_WITH]->(danny),
  (danny)-[:FRIENDS_WITH]->(emma),
  (emma)-[:FRIENDS_WITH]->(alice),

  (alice)-[:PUBLISHED]->(post1),
  (alice)-[:PUBLISHED]->(post2),
  (charlie)-[:PUBLISHED]->(post3),
  (charlie)-[:PUBLISHED]->(post4),
  (danny)-[:PUBLISHED]->(post5),

  (bob)-[:WROTE]->(reply1)-[:IN_RESPONSE_TO]->(post1),
  (emma)-[:WROTE]->(reply2)-[:IN_RESPONSE_TO]->(post3),

  (alice)-[:WANTS_TO_ATTEND]->(event1),
  (bob)-[:WANTS_TO_ATTEND]->(event2),
  (charlie)-[:WANTS_TO_ATTEND]->(event2),
  (danny)-[:WANTS_TO_ATTEND]->(event1),
  (emma)-[:WANTS_TO_ATTEND]->(event1)

'''


session.run(query)

<neo4j._sync.work.result.Result at 0x12c59d210>

# Output function

In [4]:
def run_query_to_pandas(session, query):
    # run a query and return the results in a pandas dataframe
    
    result = session.run(query)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

def run_query_print_raw(session, query):
    result = session.run(query)
    
    for r in result:
        print(r.values())

# Query tasks

In [5]:
# Retrieve a property of a specific User 
query = '''
MATCH (n:User {name: 'Alice'})
RETURN n.name AS name, n.interests AS interest
'''

df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,name,interest
0,Alice,"[NoSQL, AI]"


In [6]:
# Find all Posts created by a specific User
query = '''
MATCH (n:User {name: 'Alice'})-[:PUBLISHED]->(p:Post)
RETURN p.content AS post_content
'''
df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,post_content
0,Advancements in Graph Databases have ushered i...
1,Exploring the capabilities of Graph Databases ...


In [7]:
# Find all Users who posted a specific topic of Post (topic is a property of Post)
query = '''
MATCH (n:User)-[:PUBLISHED]->(p:Post)
WHERE 'NoSQL' IN p.topic
RETURN n.name AS name
'''
df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,name
0,Alice
1,Charlie


In [8]:
# Find common interests between two specific Users. 
query = '''
MATCH (n1:User {name: 'Alice'}), (n2:User {name: 'Bob'})
RETURN n1.name, n2.name, 
[interest IN n1.interests WHERE interest IN n2.interests] AS common_interests
'''
df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,n1.name,n2.name,common_interests
0,Alice,Bob,[AI]


In [9]:
# Retrieve top 3 Users who created most Posts
query = '''
MATCH (n:User)-[:PUBLISHED]->(p:Post)
RETURN n.name, count(p) AS posts_count
ORDER BY posts_count DESC
LIMIT 3
'''
df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,n.name,posts_count
0,Alice,2
1,Charlie,2
2,Danny,1


In [10]:
# Retrieve Users who haven’t created any Posts
query = '''
MATCH (n:User)
WHERE NOT (n)-[:PUBLISHED]->(:Post)
RETURN n.name as name
'''
df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,name
0,Bob
1,Emma
2,Frank


In [11]:
# Given two Users, identify if they are indirectly connected through a chain of friends and, if so, return the connecting path. 
query = '''
MATCH path = (alice:User {name: 'Alice'})-[:FRIENDS_WITH*]-(charlie:User {name: 'Charlie'})
WHERE length(path) > 0
RETURN path
'''

result = session.run(query)
result.data()

[{'path': [{'name': 'Alice',
    'position': 'Data Scientist',
    'interests': ['NoSQL', 'AI']},
   'FRIENDS_WITH',
   {'name': 'Bob',
    'position': 'Student',
    'interests': ['Machine Learning', 'AI']},
   'FRIENDS_WITH',
   {'name': 'Charlie',
    'position': 'Data Scientist',
    'interests': ['Deep Learning', 'NoSQL']}]},
 {'path': [{'name': 'Alice',
    'position': 'Data Scientist',
    'interests': ['NoSQL', 'AI']},
   'FRIENDS_WITH',
   {'name': 'Emma', 'position': 'Student', 'interests': ['Statistics', 'AI']},
   'FRIENDS_WITH',
   {'name': 'Danny',
    'position': 'Student',
    'interests': ['Computer Vison', 'Machine Learning']},
   'FRIENDS_WITH',
   {'name': 'Charlie',
    'position': 'Data Scientist',
    'interests': ['Deep Learning', 'NoSQL']}]}]

In [12]:
# Write a query to identify orphaned Users (Users who have no connections). 
query = '''
MATCH (n:User)
WHERE NOT (n)-[:FRIENDS_WITH]-()
RETURN n.name as name
'''
df = run_query_to_pandas(session, query)
display(df)

Unnamed: 0,name
0,Frank


Can you think of a scenario when this information is useful? <br>

**It can help improve the quality of experience for orphaned users on the social network. People without connections often feel lonely at events. We can organize some ice-breaker activities for these individuals to help them feel more comfortable.**