## Setup environment and download data

In [99]:
from pathlib import Path
from pyraphtory.context import PyRaphtory
from pyraphtory.vertex import Vertex
from pyraphtory.spouts import FileSpout
from pyraphtory.builder import *
from pyvis.network import Network


!curl -o /tmp/lotr.csv https://raw.githubusercontent.com/Raphtory/Data/main/lotr.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 52206  100 52206    0     0   171k      0 --:--:-- --:--:-- --:--:--  171k


Preview data

In [2]:
!head /tmp/lotr.csv

Gandalf,Elrond,33
Frodo,Bilbo,114
Blanco,Marcho,146
Frodo,Bilbo,205
Thorin,Gandalf,270
Thorin,Bilbo,270
Gandalf,Bilbo,270
Gollum,Bilbo,286
Gollum,Bilbo,306
Gollum,Bilbo,308


## Create a new raphtory graph

In [4]:
pr = PyRaphtory(logging=True).open()
rg = pr.new_graph()

20:01:37.020 [io-compute-8] INFO  com.raphtory.internals.management.Py4JServer - Starting PythonGatewayServer...
Port: 64760
Secret: fbf3bc13e8c176b144ba68465003df8d6fec99c86767bb83ede177eea5fec885
20:01:37.985 [Thread-11] INFO  com.raphtory.internals.context.LocalContext$ - Creating Service for 'dizzy_cerise_kite'
20:01:38.017 [io-compute-6] INFO  com.raphtory.internals.management.Prometheus$ - Prometheus started on port /0:0:0:0:0:0:0:0:9999
20:01:39.219 [io-compute-6] INFO  com.raphtory.internals.components.partition.PartitionOrchestrator$ - Creating '1' Partition Managers for 'dizzy_cerise_kite'.
20:01:40.881 [io-compute-10] INFO  com.raphtory.internals.components.partition.PartitionManager - Partition 0: Starting partition manager for 'dizzy_cerise_kite'.


## Ingest the data into a graph

In [7]:
def parse(graph, tuple: str):
    parts = [v.strip() for v in tuple.split(",")]
    source_node = parts[0]
    src_id = graph.assign_id(source_node)
    target_node = parts[1]
    tar_id = graph.assign_id(target_node)
    time_stamp = int(parts[2])

    graph.add_vertex(time_stamp, src_id, Properties(ImmutableProperty("name", source_node)), Type("Character"))
    graph.add_vertex(time_stamp, tar_id, Properties(ImmutableProperty("name", target_node)), Type("Character"))
    graph.add_edge(time_stamp, src_id, tar_id, Type("Character_Co-occurence"))

lotr_builder = GraphBuilder(parse)
lotr_spout = FileSpout("/tmp/lotr.csv")
rg.load(Source(lotr_spout, lotr_builder))

20:01:59.477 [spawner-akka.actor.default-dispatcher-5] INFO  com.raphtory.internals.components.ingestion.IngestionManager - Ingestion Manager for 'dizzy_cerise_kite' establishing new data source


com.raphtory.api.analysis.graphview.DeployedTemporalGraph@15d90208

20:01:59.549 [io-compute-3] INFO  com.raphtory.spouts.FileSpoutInstance - Spout: Processing file 'lotr.csv' ...
20:01:59.561 [spawner-akka.actor.default-dispatcher-5] INFO  com.raphtory.internals.components.querymanager.QueryManager - Source '0' is blocking analysis for Graph 'dizzy_cerise_kite'
20:02:02.520 [spawner-akka.actor.default-dispatcher-5] INFO  com.raphtory.internals.components.querymanager.QueryManager - Source '0' is unblocking analysis for Graph 'dizzy_cerise_kite' with 7947 messages sent.


### Collect simple metrics

In [51]:
from pyraphtory.graph import Row
df = rg \
      .select(lambda vertex: Row(vertex.name(), vertex.degree(), vertex.out_degree(), vertex.in_degree())) \
      .write_to_dataframe(["name", "degree", "out_degree", "in_degree"])

20:11:34.378 [spawner-akka.actor.default-dispatcher-7] INFO  com.raphtory.internals.components.querymanager.QueryManager - Query '1348533882_6538362722380512163' received, your job ID is '1348533882_6538362722380512163'.
20:11:34.378 [io-compute-9] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job 1348533882_6538362722380512163: Starting query progress tracker.
20:11:34.379 [spawner-akka.actor.default-dispatcher-7] INFO  com.raphtory.internals.components.partition.QueryExecutor - 1348533882_6538362722380512163_0: Starting QueryExecutor.
20:11:34.408 [spawner-akka.actor.default-dispatcher-7] INFO  com.raphtory.internals.components.querymanager.QueryHandler - Job '1348533882_6538362722380512163': Perspective at Time '32674' took 27 ms to run. 
20:11:34.408 [spawner-akka.actor.default-dispatcher-5] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job '1348533882_6538362722380512163': Perspective '32674' finished in 30 ms.
20:11:34.408 [spawner-akka.actor.default-dis

In [52]:
## clean
df.drop(columns=['window'], inplace=True)

In [53]:
df

Unnamed: 0,timestamp,name,degree,out_degree,in_degree
0,32674,Hirgon,2,2,0
1,32674,Hador,3,1,2
2,32674,Horn,4,1,3
3,32674,Galadriel,19,6,16
4,32674,Isildur,18,18,0
...,...,...,...,...,...
134,32674,Faramir,29,3,29
135,32674,Bain,2,1,1
136,32674,Walda,13,3,10
137,32674,Thranduil,2,0,2


#### Sort by highest degree

In [54]:
df.sort_values(['degree'], ascending=False)[:10]

Unnamed: 0,timestamp,name,degree,out_degree,in_degree
55,32674,Frodo,51,37,22
54,32674,Gandalf,49,35,24
97,32674,Aragorn,45,5,45
63,32674,Merry,34,23,18
32,32674,Pippin,34,30,10
56,32674,Elrond,32,18,24
52,32674,Théoden,30,22,9
134,32674,Faramir,29,3,29
118,32674,Sam,28,20,17
129,32674,Gimli,25,22,11


### Sort by highest in-degree

In [55]:
df.sort_values(['in_degree'], ascending=False)[:10]

Unnamed: 0,timestamp,name,degree,out_degree,in_degree
97,32674,Aragorn,45,5,45
134,32674,Faramir,29,3,29
54,32674,Gandalf,49,35,24
56,32674,Elrond,32,18,24
55,32674,Frodo,51,37,22
63,32674,Merry,34,23,18
138,32674,Boromir,18,6,17
118,32674,Sam,28,20,17
3,32674,Galadriel,19,6,16
132,32674,Legolas,25,18,16


### Sort by highest out degree

In [56]:
df.sort_values(['out_degree'], ascending=False)[:10]

Unnamed: 0,timestamp,name,degree,out_degree,in_degree
55,32674,Frodo,51,37,22
54,32674,Gandalf,49,35,24
32,32674,Pippin,34,30,10
63,32674,Merry,34,23,18
52,32674,Théoden,30,22,9
129,32674,Gimli,25,22,11
118,32674,Sam,28,20,17
56,32674,Elrond,32,18,24
4,32674,Isildur,18,18,0
132,32674,Legolas,25,18,16


# Run a pagerank algorithm

In [13]:
cols = ["prlabel"]

df_pagerank = rg.at(32674) \
                .past() \
                .transform(pr.algorithms.generic.centrality.PageRank())\
                .execute(pr.algorithms.generic.NodeList(*cols)) \
                .write_to_dataframe(["name"] + cols)

20:03:57.522 [io-compute-7] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job PageRank:NodeList_5116625976305719460: Starting query progress tracker.
20:03:57.529 [spawner-akka.actor.default-dispatcher-3] INFO  com.raphtory.internals.components.querymanager.QueryManager - Query 'PageRank:NodeList_5116625976305719460' received, your job ID is 'PageRank:NodeList_5116625976305719460'.
20:03:57.531 [spawner-akka.actor.default-dispatcher-3] INFO  com.raphtory.internals.components.partition.QueryExecutor - PageRank:NodeList_5116625976305719460_0: Starting QueryExecutor.
20:03:57.617 [spawner-akka.actor.default-dispatcher-3] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job 'PageRank:NodeList_5116625976305719460': Perspective '32674' finished in 95 ms.
20:03:57.617 [spawner-akka.actor.default-dispatcher-6] INFO  com.raphtory.internals.components.querymanager.QueryHandler - Job 'PageRank:NodeList_5116625976305719460': Perspective at Time '32674' took 82 ms to run. 
20

In [57]:
## clean
df_pagerank.drop(columns=['window'], inplace=True)

In [58]:
df_pagerank

Unnamed: 0,timestamp,name,prlabel
0,32674,Hirgon,0.277968
1,32674,Hador,0.459710
2,32674,Horn,0.522389
3,32674,Galadriel,2.228852
4,32674,Isildur,0.277968
...,...,...,...
134,32674,Faramir,8.551166
135,32674,Bain,0.396105
136,32674,Walda,0.817198
137,32674,Thranduil,0.761719


### The top ten most ranked

In [59]:
df_pagerank.sort_values(['prlabel'], ascending=False)[:10]

Unnamed: 0,timestamp,name,prlabel
97,32674,Aragorn,13.246457
134,32674,Faramir,8.551166
56,32674,Elrond,5.621548
138,32674,Boromir,4.824014
132,32674,Legolas,4.62259
110,32674,Imrahil,4.0956
65,32674,Éomer,3.473897
42,32674,Samwise,3.292762
118,32674,Sam,2.82614
55,32674,Frodo,2.806475


## Run a connected components algorithm 

In [23]:
cols = ["cclabel"]
df_cc = rg.at(32674) \
                .past() \
                .transform(pr.algorithms.generic.ConnectedComponents)\
                .execute(pr.algorithms.generic.NodeList(*cols)) \
                .write_to_dataframe(["name"] + cols)

20:06:34.043 [io-compute-6] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job ConnectedComponents:NodeList_1766542255766878766: Starting query progress tracker.
20:06:34.044 [spawner-akka.actor.default-dispatcher-6] INFO  com.raphtory.internals.components.querymanager.QueryManager - Query 'ConnectedComponents:NodeList_1766542255766878766' received, your job ID is 'ConnectedComponents:NodeList_1766542255766878766'.
20:06:34.046 [spawner-akka.actor.default-dispatcher-7] INFO  com.raphtory.internals.components.partition.QueryExecutor - ConnectedComponents:NodeList_1766542255766878766_0: Starting QueryExecutor.
20:06:34.069 [spawner-akka.actor.default-dispatcher-3] INFO  com.raphtory.internals.components.querymanager.QueryHandler - Job 'ConnectedComponents:NodeList_1766542255766878766': Perspective at Time '32674' took 20 ms to run. 
20:06:34.069 [spawner-akka.actor.default-dispatcher-5] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job 'ConnectedComponents:NodeLi

In [60]:
## clean
df_cc.drop(columns=['window'], inplace=True)

In [61]:
df_cc

Unnamed: 0,timestamp,name,cclabel
0,32674,Hirgon,-8637342647242242534
1,32674,Hador,-8637342647242242534
2,32674,Horn,-8637342647242242534
3,32674,Galadriel,-8637342647242242534
4,32674,Isildur,-8637342647242242534
...,...,...,...
134,32674,Faramir,-8637342647242242534
135,32674,Bain,-6628080393138316116
136,32674,Walda,-8637342647242242534
137,32674,Thranduil,-8637342647242242534


### Number of distinct components 

In [28]:
len(set(df_cc['cclabel']))

3

### Size of components 

In [70]:
df_cc.groupby(['cclabel']).count().reset_index().drop(columns=['timestamp'])

Unnamed: 0,cclabel,name
0,-8637342647242242534,134
1,-6628080393138316116,3
2,-5499479516525190226,2


### Run chained algorithms at once 

In [181]:
cols = ["inDegree", "outDegree", "degree","prlabel","cclabel"]

df_chained = rg.at(32674) \
                .past() \
                .transform(pr.algorithms.generic.centrality.PageRank())\
                .transform(pr.algorithms.generic.ConnectedComponents)\
                .transform(pr.algorithms.generic.centrality.Degree())\
                .execute(pr.algorithms.generic.NodeList(*cols)) \
                .write_to_dataframe(["name"] + cols)

20:47:11.271 [io-compute-7] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job PageRank:ConnectedComponents:Degree:NodeList_7456998850402965589: Starting query progress tracker.
20:47:11.271 [spawner-akka.actor.default-dispatcher-3] INFO  com.raphtory.internals.components.querymanager.QueryManager - Query 'PageRank:ConnectedComponents:Degree:NodeList_7456998850402965589' received, your job ID is 'PageRank:ConnectedComponents:Degree:NodeList_7456998850402965589'.
20:47:11.273 [spawner-akka.actor.default-dispatcher-7] INFO  com.raphtory.internals.components.partition.QueryExecutor - PageRank:ConnectedComponents:Degree:NodeList_7456998850402965589_0: Starting QueryExecutor.
20:47:11.320 [spawner-akka.actor.default-dispatcher-10] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job 'PageRank:ConnectedComponents:Degree:NodeList_7456998850402965589': Perspective '32674' finished in 49 ms.
20:47:11.320 [spawner-akka.actor.default-dispatcher-6] INFO  com.raphtory.internal

In [182]:
df_chained.drop(columns=['window'])

Unnamed: 0,timestamp,name,inDegree,outDegree,degree,prlabel,cclabel
0,32674,Hirgon,0,2,2,0.277968,-8637342647242242534
1,32674,Hador,2,1,3,0.459710,-8637342647242242534
2,32674,Horn,3,1,4,0.522389,-8637342647242242534
3,32674,Galadriel,16,6,19,2.228852,-8637342647242242534
4,32674,Isildur,0,18,18,0.277968,-8637342647242242534
...,...,...,...,...,...,...,...
134,32674,Faramir,29,3,29,8.551166,-8637342647242242534
135,32674,Bain,1,1,2,0.396105,-6628080393138316116
136,32674,Walda,10,3,13,0.817198,-8637342647242242534
137,32674,Thranduil,2,0,2,0.761719,-8637342647242242534


In [188]:
df_chained

Unnamed: 0,timestamp,window,name,inDegree,outDegree,degree,prlabel,cclabel
0,32674,,Hirgon,0,2,2,0.277968,-8637342647242242534
1,32674,,Hador,2,1,3,0.459710,-8637342647242242534
2,32674,,Horn,3,1,4,0.522389,-8637342647242242534
3,32674,,Galadriel,16,6,19,2.228852,-8637342647242242534
4,32674,,Isildur,0,18,18,0.277968,-8637342647242242534
...,...,...,...,...,...,...,...,...
134,32674,,Faramir,29,3,29,8.551166,-8637342647242242534
135,32674,,Bain,1,1,2,0.396105,-6628080393138316116
136,32674,,Walda,10,3,13,0.817198,-8637342647242242534
137,32674,,Thranduil,2,0,2,0.761719,-8637342647242242534


### add nodes for visualisation 

In [201]:
def visualise(rg, df_chained):
    # Create network object
    net = Network(notebook=True, height='750px', width='100%', bgcolor='#222222', font_color='white')
    # Set visuasiation tool
    net.force_atlas_2based()
    # Get the node list 
    df_node_list = rg.at(32674) \
                .past() \
                .execute(pr.algorithms.generic.NodeList()) \
                .write_to_dataframe(['name'])
    
    nodes = df_node_list['name'].tolist()
    
    node_data = []
    ignore_items = ['timestamp', 'name', 'window']
    for node_name in nodes:
        for i, row in df_chained.iterrows():
            if row['name']==node_name:
                data = ''
                for k,v in row.iteritems():
                    if k not in ignore_items:
                        data = data+str(k)+': '+str(v)+'\n'
                node_data.append(data)
                continue
    # Add the nodes
    net.add_nodes(nodes, title=node_data)
    # Get the edge list
    df_edge_list = rg.at(32674) \
            .past() \
            .execute(pr.algorithms.generic.EdgeList()) \
            .write_to_dataframe(['from', 'to'])
    edges = []
    for i, row in df_edge_list[['from', 'to']].iterrows():
        edges.append([row['from'], row['to']])
    # Add the edges
    net.add_edges(edges)
    # Toggle physics
    net.toggle_physics(True)
    return net

In [202]:
net = visualise(rg, df_chained)

20:58:04.311 [io-compute-5] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job NodeList_1145279678414288633: Starting query progress tracker.
20:58:04.312 [spawner-akka.actor.default-dispatcher-11] INFO  com.raphtory.internals.components.querymanager.QueryManager - Query 'NodeList_1145279678414288633' received, your job ID is 'NodeList_1145279678414288633'.
20:58:04.313 [spawner-akka.actor.default-dispatcher-11] INFO  com.raphtory.internals.components.partition.QueryExecutor - NodeList_1145279678414288633_0: Starting QueryExecutor.
20:58:04.322 [spawner-akka.actor.default-dispatcher-8] INFO  com.raphtory.internals.components.querymanager.QueryHandler - Job 'NodeList_1145279678414288633': Perspective at Time '32674' took 8 ms to run. 
20:58:04.322 [spawner-akka.actor.default-dispatcher-11] INFO  com.raphtory.api.querytracker.QueryProgressTracker - Job 'NodeList_1145279678414288633': Perspective '32674' finished in 11 ms.
20:58:04.322 [spawner-akka.actor.default-dispatcher-11

In [203]:
net.show('preview.html')

In [None]:
pr.shutdown()