# Graph Analysis
After uploading the dataset to Neo4j, it is possible to perform Cypher queries for analyzing the graph dynamics and structure. There are several analysis that can be perfomed with the given graph structure, such as getting analyzing the user expertise by checking his/her favorite topics, suggest subcategories by looking at the most popular topics, etc.

In [2]:
# Import the libraries and access the graph
from py2neo import Graph

# Using env vars
# neo4j_host = os.environ.get('NEO4J_HOST')
# neo4j_port = os.environ.get('NEO4J_PORT')
# neo4j_user = os.environ.get('NEO4J_USER')
# neo4j_password = os.environ.get('NEO4J_PASSWORD')

neo4j_host = "localhost"
neo4j_port = 7687
neo4j_user = "neo4j"
neo4j_password = "test"
graph = Graph("bolt://" + neo4j_host + ':' + str(neo4j_port),
                             auth=(neo4j_user, neo4j_password))

In [23]:
# Get users with predicts relationships
result = graph.run("MATCH (u:User)-[p:PREDICTS]-() RETURN distinct(u)")
for i in result:
    print(i)


Node('User', user_id=127896)
Node('User', user_id=102726)
Node('User', user_id=106640)
Node('User', user_id=124344)
Node('User', user_id=103399)
Node('User', user_id=105765)
Node('User', user_id=114826)
Node('User', user_id=104554)
Node('User', user_id=109860)
Node('User', user_id=124378)


In [30]:
### Favorite categories per user
user_id = 124344 #User to consult
amount_of_results = 10
#Run query
result = graph.run("MATCH (u:User)-[p:PREDICTS]-(q:Question)-[b:BELONGSTO]-(c:Category) WHERE u.user_id=$user RETURN u,c, COUNT(c) as relationships_amount ORDER BY relationships_amount DESC LIMIT $amount", user=user_id, amount=amount_of_results).data()
#Print results
for i in result:
    print(i)



{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=10333, title='Geopolitics – Armed Conflict'), 'relationships_amount': 236}
{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=10349, title='Effective Altruism'), 'relationships_amount': 222}
{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=10353, title='Politics – US'), 'relationships_amount': 200}
{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=10315, title='Geopolitics'), 'relationships_amount': 142}
{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=10345, title='Novel Coronavirus (Covid-19)'), 'relationships_amount': 106}
{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=9814, title='Series – Nuclear Threats'), 'relationships_amount': 99}
{'u': Node('User', user_id=124344), 'c': Node('Category', category_id=10052, title='Ukraine'), 'relationships_amount': 86}
{'u': Node('User', user_id=124344), 'c': Node('Category

In [31]:
# Favorite topics per user
user_id = 124344 #User to consult
amount_of_results = 10
#Run query
result = graph.run("MATCH (u:User)-[p:PREDICTS]-(q:Question)-[h:HAS]-(t:Topic) WHERE u.user_id=$user RETURN u,t, COUNT((t)) as relationships_amount ORDER BY relationships_amount DESC LIMIT $amount", user=user_id, amount=amount_of_results).data()
#Print results
for i in result:
    print(i)

{'u': Node('User', user_id=124344), 't': Node('Topic', title='United States', topic_id='Q30'), 'relationships_amount': 347}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='Metaculus', topic_id='Q65086363'), 'relationships_amount': 300}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='Not out', topic_id='Q3595513'), 'relationships_amount': 175}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='Russia', topic_id='Q159'), 'relationships_amount': 145}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='China', topic_id='Q148'), 'relationships_amount': 114}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='Ukraine', topic_id='Q212'), 'relationships_amount': 110}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='President of the United States', topic_id='Q11696'), 'relationships_amount': 103}
{'u': Node('User', user_id=124344), 't': Node('Topic', title='Donald Trump', topic_id='Q22686'), 'relationships_amount': 100}
{'u

In [33]:
# Topics with most relationships
amount_of_results = 10
#Run query
result = graph.run("MATCH (a:Topic)-[r]-() RETURN a, COUNT(r) as relationships_amount ORDER BY relationships_amount DESC LIMIT $amount", amount=amount_of_results).data()
#Print results
for i in result:
    print(i)

{'a': Node('Topic', title='United States', topic_id='Q30'), 'relationships_amount': 1266}
{'a': Node('Topic', title='Metaculus', topic_id='Q65086363'), 'relationships_amount': 995}
{'a': Node('Topic', title='Not out', topic_id='Q3595513'), 'relationships_amount': 919}
{'a': Node('Topic', title='United States dollar', topic_id='Q4917'), 'relationships_amount': 578}
{'a': Node('Topic', title='Artificial intelligence', topic_id='Q11660'), 'relationships_amount': 522}
{'a': Node('Topic', title='COVID-19 pandemic', topic_id='Q81068910'), 'relationships_amount': 492}
{'a': Node('Topic', title='China', topic_id='Q148'), 'relationships_amount': 420}
{'a': Node('Topic', title='Free will', topic_id='Q9476'), 'relationships_amount': 399}
{'a': Node('Topic', title='Donald Trump', topic_id='Q22686'), 'relationships_amount': 328}
{'a': Node('Topic', title='United Kingdom', topic_id='Q145'), 'relationships_amount': 309}
