In [14]:
!pip3 install neo4j-driver
!pip3 install graphdatascience
%matplotlib inline

import json
import time

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from neo4j import GraphDatabase, basic_auth
from graphdatascience import GraphDataScience as gds
#from neo4j import GraphDataScience as gds

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [15]:
# A wrapper class to manage connection
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver( self.__uri, auth=basic_auth(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [18]:
# Actually create a connection. Copy uri (with bolt port), username and passwords from your own sandbox instance
conn = Neo4jConnection(uri="bolt://18.206.126.225:7687", user="neo4j", pwd="space-bell-baseline")

In [19]:
# clear data
# DETACH will remove add edges as well
query_string = '''
MATCH (c:Character) 
DETACH DELETE c;
'''
conn.query(query_string)

# read characters from season1 of GOT and add as Character nodes 
query_string = '''
WITH "https://raw.githubusercontent.com/mathbeveridge/gameofthrones/master/data/got-s1-nodes.csv" AS uri
LOAD CSV WITH HEADERS FROM uri AS row 
MERGE (c:Character {id:row.Id})
SET c.name = row.Label
'''
conn.query(query_string)

# read character interactions from season1 of GOT and add as edges
query_string = '''
WITH "https://raw.githubusercontent.com/mathbeveridge/gameofthrones/master/data/got-s1-edges.csv" AS uri
LOAD CSV WITH HEADERS FROM uri AS row
MATCH (source:Character {id: row.Source})
MATCH (target:Character {id: row.Target})
MERGE (source)-[:SEASON1 {weight: toInteger(row.Weight)}]-(target)
'''
conn.query(query_string)





[]

In [20]:
# See the degrees of nodes
query_string = '''
MATCH (c:Character) 
RETURN c.name, SIZE([(c)-[:SEASON1]->(s) | s]) AS inDegree 
ORDER BY inDegree DESC 
'''
top_cat_df = pd.DataFrame([dict(_) for _ in conn.query(query_string)])
print(top_cat_df.head(30))

              c.name  inDegree
0            Catelyn        29
1               Arya        28
2             Cersei        23
3                Ned        23
4               Bran        18
5            Joffrey        18
6           Daenerys        17
7              Jaime        16
8              Petyr        15
9                Jon        14
10            Sandor        13
11             Aerys        12
12             Bronn        12
13         Barristan        10
14             Drogo        10
15          Greatjon        10
16              Jeor        10
17            Benjen         9
18             Jorah         9
19              Robb         9
20          Allister         8
21            Baelor         8
22            Dareon         8
23             Luwin         8
24            Robert         8
25            Doreah         7
26         Jon Arryn         7
27           Pycelle         7
28             Grenn         6
29  Hugh of the Vale         6


In [24]:
#Create a projecttion of the season 1 subgraph
query_string = '''
CALL gds.graph.project.cypher(
    'season1_graph',
    'MATCH (n) RETURN id(n) AS id',
    'MATCH (n)-[e:SEASON1]-(m) RETURN id(n) AS source, e.weight AS weight, id(m) AS target'
)
'''
conn.query(query_string)

[<Record nodeQuery='MATCH (n) RETURN id(n) AS id' relationshipQuery='MATCH (n)-[e:SEASON1]-(m) RETURN id(n) AS source, e.weight AS weight, id(m) AS target' graphName='SEASON1_graph' nodeCount=8753 relationshipCount=1098 projectMillis=153>]

In [25]:
# Compute pagerank of the season 1 subgraph
query_string = '''
CALL gds.pageRank.stream('season1_graph', {
  maxIterations: 100,
  dampingFactor: 0.85,
  relationshipWeightProperty: 'weight'
})
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id,      gds.util.asNode(nodeId).name as name, score as full_pagerank
ORDER BY full_pagerank DESC
'''
conn.query(query_string)

[<Record id='NED' name='Ned' full_pagerank=10.33116284333185>,
 <Record id='TYRION' name='Tyrion' full_pagerank=6.22972302778835>,
 <Record id='CATELYN' name='Catelyn' full_pagerank=4.954967427214677>,
 <Record id='JON' name='Jon' full_pagerank=4.892704445159662>,
 <Record id='DAENERYS' name='Daenerys' full_pagerank=4.582201035225358>,
 <Record id='ROBERT' name='Robert' full_pagerank=4.337689913406829>,
 <Record id='ROBB' name='Robb' full_pagerank=3.65719707213589>,
 <Record id='CERSEI' name='Cersei' full_pagerank=3.225683490601557>,
 <Record id='SANSA' name='Sansa' full_pagerank=3.1236520218252557>,
 <Record id='ARYA' name='Arya' full_pagerank=3.113969188252927>,
 <Record id='JOFFREY' name='Joffrey' full_pagerank=2.891065710480182>,
 <Record id='LITTLEFINGER' name='Petyr' full_pagerank=2.8327005155956058>,
 <Record id='BRAN' name='Bran' full_pagerank=2.8234960593710055>,
 <Record id='JORAH' name='Jorah' full_pagerank=2.5583828723849087>,
 <Record id='JAIME' name='Jaime' full_pagerank=

In [26]:
# Detect communities from the season 1 subgraph
query_string = '''
CALL gds.louvain.write('season1_graph', 
    {relationshipWeightProperty: 'weight', 
     writeProperty: 'full_community_id'
})
'''
conn.query(query_string)

[<Record writeMillis=383 nodePropertiesWritten=8753 modularity=0.5279309640522876 modularities=[0.4798590085544023, 0.5279309640522876] ranLevels=2 communityCount=8633 communityDistribution={'p99': 1, 'min': 1, 'max': 47, 'mean': 1.0139001505849647, 'p90': 1, 'p50': 1, 'p999': 1, 'p95': 1, 'p75': 1} postProcessingMillis=6 preProcessingMillis=0 computeMillis=2240 configuration={'maxIterations': 10, 'writeConcurrency': 4, 'seedProperty': None, 'consecutiveIds': False, 'maxLevels': 10, 'relationshipWeightProperty': 'weight', 'concurrency': 4, 'jobId': '672646aa-78ba-4a2c-8048-590339270d8e', 'writeProperty': 'full_community_id', 'logProgress': True, 'includeIntermediateCommunities': False, 'nodeLabels': ['*'], 'sudo': False, 'relationshipTypes': ['*'], 'tolerance': 0.0001}>]