# Benchmark 

In [1]:
# Load raphtory 
import gzip
import csv
import raphtory
from tqdm import tqdm
import random

profiles_file = "data/soc-pokec-profiles.txt.gz" # 1,632,803
relationships_file = "data/soc-pokec-relationships.txt.gz" # 30,622,564

In [14]:
def load_edges(g):
    # Load edges
    with gzip.open(relationships_file, 'rt') as f:
        reader = csv.reader(f, delimiter='\t')
        for row in tqdm(reader, total=30622564):
            g.add_edge(1, row[0], row[1], {})
    # 30622564it [02:07, 240673.12it/s]

def load_props(g):
    # Load node properties
    with gzip.open(profiles_file, 'rt') as f:
        reader = csv.reader(f, delimiter='\t')
        for row in tqdm(reader, total=1632803):
            
            props = {
                "user_id": row[0],
                "public": row[1], 
                "gender": row[3],
                "region": row[4],
                "last_login": row[5],
                "registration": row[6],
                "age": row[7],
            }
            g.add_vertex(1, row[0], props)
    # 30622564it [02:07, 240673.12it/s]    

def save_graph(g):
    # 2gb file
    g.save_to_file("saved_graph")
    
def load_graph():
    return raphtory.Graph.load_from_file("/Users/haaroony/Documents/dev/Raphtory/examples/py/benchmark/saved_graph")
    
def new_graph():
    g = raphtory.Graph()
    load_edges(g)
    load_props(g)
    return g

In [15]:
# Graph(number_of_edges=30622564, 
#    number_of_vertices=1632803, 
#   earliest_time=1, latest_time=1)
# g = new_graph()
g = load_graph()

## Run queries

## ARRANGO

In [16]:
def get_random_vertex(g):
    return str(random.randint(1, g.num_vertices()))

In [17]:
def single_vertex_read(g: raphtory.Graph):
    _id = random.randint(1, g.num_vertices())
    return g.vertex(str(_id))

single_vertex_read(g)

Vertex(name=598313, properties={registration : 2011-08-25 00:00:00.0, _id : 598313, gender : 1, public : 1, region : nitriansky kraj, topolcany, age : 27, last_login : 2011-08-30 01:11:00.0, user_id : 598313})

In [18]:
def single_vertex_write(g: raphtory.Graph):
    _id = g.num_vertices() * 10
    g.add_vertex(1, _id)

single_vertex_write(g)

In [19]:
def single_edge_write(g: raphtory.Graph):
    _from = get_random_vertex(g)
    _to = get_random_vertex(g)
    if g.has_vertex(_from) and g.has_vertex(_to):
        g.add_edge(1, _from, _to, {})
    else:
        raise Exception("could not find from and to")

single_edge_write(g)

In [20]:
def aggregate(g: raphtory.Graph):
    ages = g.vertices().property("age")
    return ages, len(ages)

aggregate(g)

(OptionPropIterable([26, 0, 22, 38, 29, 26, 26, 26, 0, 22, ...]), 1632804)

In [21]:
def aggregate_with_distinct(g: raphtory.Graph):
    ages = g.vertices().property("age")
    return len(set(ages))

aggregate_with_distinct(g)

115

In [22]:
def aggregate_with_filter(g: raphtory.Graph):
    ages = g.vertices().property("age")
    over_18 = [a for a in ages if a != 'null' and a != None and int(a) > 18]
    return over_18, len(over_18)

aggregate_with_filter(g)

(['26',
  '22',
  '38',
  '29',
  '26',
  '26',
  '26',
  '22',
  '26',
  '26',
  '22',
  '23',
  '21',
  '20',
  '19',
  '26',
  '20',
  '21',
  '23',
  '19',
  '19',
  '21',
  '24',
  '21',
  '22',
  '23',
  '21',
  '28',
  '20',
  '22',
  '22',
  '19',
  '21',
  '21',
  '19',
  '23',
  '21',
  '22',
  '20',
  '21',
  '19',
  '19',
  '22',
  '19',
  '23',
  '22',
  '23',
  '22',
  '22',
  '19',
  '27',
  '23',
  '20',
  '23',
  '24',
  '25',
  '20',
  '20',
  '19',
  '21',
  '24',
  '23',
  '19',
  '23',
  '22',
  '21',
  '20',
  '23',
  '21',
  '23',
  '21',
  '20',
  '23',
  '22',
  '24',
  '21',
  '21',
  '22',
  '20',
  '19',
  '21',
  '29',
  '20',
  '21',
  '21',
  '21',
  '21',
  '19',
  '21',
  '22',
  '23',
  '22',
  '21',
  '21',
  '22',
  '20',
  '29',
  '21',
  '23',
  '20',
  '21',
  '22',
  '21',
  '20',
  '27',
  '20',
  '22',
  '21',
  '21',
  '20',
  '19',
  '22',
  '24',
  '22',
  '22',
  '23',
  '21',
  '19',
  '20',
  '27',
  '23',
  '22',
  '21',
  '22',
  '25',


In [23]:
def expansion_1(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return next(iter(g.vertex(_id).out_neighbours()))
    
expansion_1(g)

Vertex(name=303169, properties={user_id : 303169, registration : 2004-01-09 00:00:00.0, gender : 0, age : 24, public : 1, last_login : 2012-05-24 13:25:00.0, region : kosicky kraj, kosice - okolie, _id : 303169})

In [24]:
def age_filter(x):
    return x != 'null' and x != None and int(x) >= 18

def expansion_1_with_filter(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return [n.name() for n in g.vertex(_id).out_neighbours() if age_filter(n.property('age'))]

expansion_1_with_filter(g)

[]

In [25]:
def expansion_2(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return set(g.vertex(_id).out_neighbours().out_neighbours().name()) 
    
expansion_2(g)

{'1561827', '933661'}

In [26]:
def age_filter(x):
    return x != 'null' and x != None and int(x) >= 18

def expansion_2_with_filter(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return set([n.name() for n in g.vertex(_id).out_neighbours().out_neighbours() if age_filter(n.property('age'))])

expansion_2_with_filter(g)

{'746352'}

In [27]:
def expansion_3(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return set(g.vertex(_id).out_neighbours().out_neighbours().out_neighbours().name()) 

expansion_3(g)

{'877279',
 '240763',
 '704370',
 '765141',
 '223585',
 '1010991',
 '233634',
 '5889',
 '398191',
 '424257',
 '402159',
 '145761',
 '48816',
 '516660',
 '705299',
 '257436',
 '242939',
 '608150',
 '571719',
 '246933',
 '1059192',
 '364623',
 '362027',
 '806809',
 '621519',
 '827305',
 '955084',
 '72055',
 '57759',
 '1091647',
 '159351',
 '359421',
 '952077',
 '982869',
 '357609',
 '314833',
 '625109',
 '224919',
 '447347',
 '174166',
 '238807',
 '201470',
 '102448',
 '178432',
 '280204',
 '856494',
 '471344',
 '732639',
 '51581',
 '940494',
 '829366',
 '1135376',
 '104033',
 '35442',
 '258715',
 '229254',
 '1137436',
 '463924',
 '910593',
 '619255',
 '1081579',
 '237885',
 '651251',
 '87698',
 '82898',
 '702200',
 '1150012',
 '420481',
 '629562',
 '243897',
 '181310',
 '594561',
 '20008',
 '410305',
 '986326',
 '450068',
 '89451',
 '223683',
 '1360294',
 '61149',
 '554127',
 '142376',
 '1486207',
 '1028356',
 '278748',
 '268424',
 '294466',
 '726904',
 '543775',
 '76633',
 '874022',
 '

In [28]:
def age_filter(x):
    return x != 'null' and x != None and int(x) >= 18

def expansion_3_with_filter(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return set([n.name() for n in g.vertex(_id).out_neighbours().out_neighbours().out_neighbours() if age_filter(n.property('age'))])

expansion_3_with_filter(g)

{'1274777',
 '421475',
 '126797',
 '275411',
 '462469',
 '453660',
 '44803',
 '1154425',
 '183631',
 '195395',
 '489080',
 '391996',
 '498450',
 '487459',
 '470905',
 '421401',
 '869695',
 '853827',
 '699263',
 '773861',
 '193693',
 '385281',
 '425305',
 '1276664',
 '613179',
 '642152',
 '378107',
 '28538',
 '663078',
 '417792',
 '1071755',
 '259069',
 '775821',
 '233386',
 '752193',
 '447347',
 '1079893',
 '239658',
 '210848',
 '358613',
 '562057',
 '471038',
 '417116',
 '501058',
 '461992',
 '1323527',
 '287240',
 '158288',
 '235207',
 '556432',
 '481999',
 '207871',
 '597141',
 '99823',
 '491268',
 '145893',
 '350773',
 '485946',
 '1313820',
 '1181727',
 '358308',
 '204372',
 '855105',
 '417221',
 '98107',
 '967368',
 '116427',
 '597531',
 '915127',
 '486938',
 '815793',
 '1124400',
 '709038',
 '138213',
 '640099',
 '258318',
 '96264',
 '487180',
 '271182',
 '417396',
 '763573',
 '163056',
 '189082',
 '655143',
 '472081',
 '485548',
 '76633',
 '612977',
 '240058',
 '639517',
 '75805

In [29]:
def expansion_4(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return set(g.vertex(_id)
               .out_neighbours()
               .out_neighbours()
               .out_neighbours()
               .out_neighbours()
               .name()) 

expansion_4(g)

set()

In [30]:
def age_filter(x):
    return x != 'null' and x != None and int(x) >= 18

def expansion_4_with_filter(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return set([n.name() for n in g.vertex(_id)
                .out_neighbours()
                .out_neighbours()
                .out_neighbours()
                .out_neighbours() 
                if age_filter(n.property('age'))])

expansion_4_with_filter(g)

{'543810',
 '253572',
 '970328',
 '225541',
 '299447',
 '192677',
 '297866',
 '233699',
 '895029',
 '139215',
 '280569',
 '810475',
 '380764',
 '108147',
 '1192773',
 '233634',
 '210928',
 '210699',
 '505655',
 '174214',
 '374748',
 '314655',
 '245992',
 '82618',
 '417862',
 '372871',
 '640175',
 '178369',
 '969976',
 '225802',
 '125181',
 '929358',
 '156987',
 '295663',
 '337962',
 '256291',
 '613769',
 '272797',
 '835365',
 '684338',
 '104606',
 '390941',
 '360439',
 '680607',
 '423983',
 '225992',
 '112679',
 '328045',
 '458112',
 '863219',
 '275013',
 '424022',
 '292016',
 '634648',
 '217333',
 '681463',
 '461502',
 '613100',
 '21921',
 '1489152',
 '582305',
 '646371',
 '683443',
 '633382',
 '877592',
 '184766',
 '936024',
 '611133',
 '712952',
 '51995',
 '164979',
 '435715',
 '549364',
 '1449272',
 '91493',
 '566016',
 '1175502',
 '63529',
 '11122',
 '180217',
 '5282',
 '238443',
 '122921',
 '74789',
 '485068',
 '271182',
 '61149',
 '286296',
 '452833',
 '119512',
 '211604',
 '353

In [31]:
def neighbours_2(g: raphtory.Graph):
    _id = get_random_vertex(g)
    one_hop = g.vertex(_id).out_neighbours()
    two_hop_names = one_hop.out_neighbours().name()
    return set(one_hop.name()).union(two_hop_names)

neighbours_2(g)

{'101233',
 '105249',
 '1073678',
 '1080530',
 '108252',
 '1087419',
 '1091222',
 '1117771',
 '1128187',
 '1128222',
 '113500',
 '1138668',
 '1167856',
 '118229',
 '1182817',
 '119013',
 '1193270',
 '1193416',
 '1196867',
 '119708',
 '1203218',
 '1213879',
 '121801',
 '1221666',
 '1252536',
 '1283',
 '1288171',
 '129746',
 '1334220',
 '1334295',
 '135172',
 '135303',
 '14296',
 '1486373',
 '1486493',
 '1511003',
 '155078',
 '1571282',
 '175741',
 '175789',
 '182732',
 '185469',
 '18705',
 '19092',
 '192060',
 '193855',
 '199641',
 '202437',
 '21011',
 '249390',
 '25809',
 '259603',
 '259637',
 '261338',
 '266912',
 '270311',
 '27985',
 '285264',
 '286038',
 '295549',
 '297888',
 '298886',
 '299176',
 '304706',
 '305601',
 '306368',
 '307821',
 '311823',
 '313269',
 '31507',
 '31713',
 '31720',
 '31929',
 '31961',
 '31963',
 '31966',
 '32052',
 '322184',
 '333411',
 '336214',
 '35544',
 '368753',
 '372599',
 '372614',
 '372970',
 '373997',
 '380201',
 '380726',
 '391542',
 '39272',
 '39

In [32]:
def age_filter(x):
    return x != 'null' and x != None and int(x) >= 18


def neighbours_2_with_filter(g: raphtory.Graph):
    _id = get_random_vertex(g)
    one_hop = [n.out_neighbours() for n in g.vertex(_id).out_neighbours() if age_filter(n.property('age'))]
    two_hop_names = {n.name() for nh in one_hop for n in nh if age_filter(n.property('age'))}
    return {n.name() for n in g.vertex(_id).out_neighbours() if age_filter(n.property('age'))}.union(two_hop_names)    

neighbours_2_with_filter(g)

{'1001713',
 '1005480',
 '1030005',
 '1031528',
 '1031610',
 '1031613',
 '1044039',
 '1052276',
 '1052970',
 '1058505',
 '1069489',
 '1069651',
 '1070942',
 '1110980',
 '1133538',
 '1134067',
 '1134186',
 '1135337',
 '1135398',
 '1141895',
 '120823',
 '1231165',
 '1235555',
 '1241245',
 '124176',
 '1270880',
 '1306544',
 '1320455',
 '1332556',
 '1333454',
 '1417286',
 '1500257',
 '155898',
 '162399',
 '164111',
 '167027',
 '167065',
 '175191',
 '176901',
 '178189',
 '182935',
 '183047',
 '183087',
 '183091',
 '183145',
 '224548',
 '224627',
 '225168',
 '225203',
 '225212',
 '225287',
 '225290',
 '225293',
 '225297',
 '257846',
 '261833',
 '261968',
 '269905',
 '282505',
 '295452',
 '303504',
 '311134',
 '311671',
 '31667',
 '321449',
 '323993',
 '327873',
 '329894',
 '331107',
 '335757',
 '360559',
 '373918',
 '374688',
 '376294',
 '376330',
 '376377',
 '384761',
 '39593',
 '400667',
 '404647',
 '40744',
 '407791',
 '416094',
 '430352',
 '431508',
 '432553',
 '432662',
 '432669',
 '435

In [33]:
def neighbours_2_with_data(g: raphtory.Graph):
    _id = get_random_vertex(g)
    two_hop = [n.out_neighbours() for n in g.vertex(_id).out_neighbours()]
    nodes = {n for n in g.vertex(_id).out_neighbours()}.union(two_hop)
    return [(n, n.name()) for n in nodes]

neighbours_2_with_data(g)    

[(Vertex(name=793891, properties={public : 0, age : 0, user_id : 793891, region : zahranicie, zahranicie - ostatne, registration : 2007-06-04 00:00:00.0, _id : 793891, gender : 0, last_login : 2012-04-15 17:21:00.0}),
  '793891'),
 (Vertex(name=980384, properties={user_id : 980384, last_login : 2012-03-25 20:20:00.0, age : 24, gender : 1, registration : 2004-10-08 00:00:00.0, public : 1, _id : 980384, region : presovsky kraj, svidnik}),
  '980384'),
 (PathFromVertex(Vertex(name=218477, properties={public : 1, user_id : 218477, _id : 218477, region : banskobystricky kraj, zvolen, registration : 2005-04-25 00:00:00.0, gender : 0, age : 23, last_login : 2011-11-05 00:00:00.0}), Vertex(name=852609, properties={gender : 0, last_login : 2012-05-23 21:39:00.0, registration : 2009-01-02 00:00:00.0, _id : 852609, age : 30, region : banskobystricky kraj, brezno, user_id : 852609, public : 1}), Vertex(name=223858, properties={user_id : 223858, region : bratislavsky kraj, bratislava - petrzalka, g

In [34]:
def age_filter(x):
    return x != 'null' and x != None and int(x) >= 18


def neighbours_2_with_data_and_filter(g: raphtory.Graph):
    _id = get_random_vertex(g)
    two_hop = [n.out_neighbours() for n in g.vertex(_id).out_neighbours()]
    nodes = {n for n in g.vertex(_id).out_neighbours()}.union(two_hop)
    return [(n, n.name()) for n in nodes]

neighbours_2_with_data_and_filter(g)    

[(Vertex(name=154139, properties={region : presovsky kraj, vranov nad toplou, gender : 1, last_login : 2012-05-26 00:05:00.0, public : 1, user_id : 154139, _id : 154139, registration : 2008-11-13 00:00:00.0, age : 16}),
  '154139'),
 (PathFromVertex(Vertex(name=55637, properties={region : zilinsky kraj, zilina, _id : 55637, registration : 2011-08-30 00:00:00.0, public : 1, age : 14, user_id : 55637, gender : 0, last_login : 2012-05-25 23:17:00.0}), Vertex(name=30707, properties={region : presovsky kraj, vranov nad toplou, registration : 2006-06-16 00:00:00.0, last_login : 2012-01-03 12:12:00.0, gender : 0, public : 1, _id : 30707, age : 18, user_id : 30707}), Vertex(name=35989, properties={region : presovsky kraj, vranov nad toplou, last_login : 2012-05-25 23:42:04.0, public : 0, age : 0, _id : 35989, user_id : 35989, registration : 2007-09-28 00:00:00.0, gender : 0}), Vertex(name=37341, properties={region : presovsky kraj, vranov nad toplou, user_id : 37341, last_login : 2012-05-23 21

In [35]:
def shortest_path(g: raphtory.Graph):
    # REQUIRES A BFS ALGORITHM
    pass

def shortest_path_with_filter(g: raphtory.Graph):
    # REQUIRES A BFS ALGORITHM
    pass

def all_shortest_paths(g: raphtory.Graph):
    # REQUIRES A BFS ALGORITHM
    pass


## MEMGRAPH QUERIES

In [36]:
def create_edge(g: raphtory.Graph):
    single_edge_write(g)
    
def create_vertex(g: raphtory.Graph):
    single_vertex_write(g)

In [37]:
def create_vertex_big(g: raphtory.Graph):
    _id = g.num_vertices()+1
    g.add_vertex(0, _id, 
             {
                "L1": "L1","L2": "L2","L3": "L3","L4": "L4",
                 "L5": "L5","L6": "L6","L7": "L7",
                "p1": True, 
                "p2": 42,
                "p3": "Here is some text that is not extremely short",
                "p4": "Short text",
                "p5": 234.434,
                "p6": 11.11,
                "p7": False,
            })
create_vertex_big(g)

In [38]:
def aggregation_count(g: raphtory.Graph):
    aggregate(g)

In [39]:
def aggregation_min_max_avg(g: raphtory.Graph):
    ages = [int(x) for x in g.vertices.property('age') if x != None and x != 'null']
    min_age, max_age, avg_age = min(ages), max(ages), sum(ages)/len(ages)
    return min_age, max_age, avg_age

aggregation_min_max_avg(g)

(0, 112, 17.065383060564486)

In [40]:
def match_pattern_cycle(g: raphtory.Graph):
    _id = get_random_vertex(g)
    e_1 = g.vertex(_id).out_edges().first()
    e_2 = g.vertex(e_1.dst()).out_edges().first()
    return e_1, e_1.dst(), e_2

match_pattern_cycle(g)

(Edge(source=351725, target=34475, earliest_time=1, latest_time=1),
 Vertex(name=34475, properties={registration : 2008-07-19 00:00:00.0, region : trenciansky kraj, trencin, gender : 1, last_login : 2012-05-04 01:56:00.0, user_id : 34475, _id : 34475, age : 0, public : 1}),
 Edge(source=34475, target=12675, earliest_time=1, latest_time=1))

In [41]:
def match_pattern_long(g: raphtory.Graph):
    _id = get_random_vertex(g)
    n_3 = g.vertex(_id).out_neighbours().out_neighbours()
    list_n_5 = n_3.out_neighbours().in_neighbours()
    return next(iter(list_n_5))

match_pattern_long(g)

Vertex(name=2, properties={region : zilinsky kraj, kysucke nove mesto, user_id : 2, _id : 2, registration : 2007-11-30 00:00:00.0, public : 1, last_login : 2012-05-25 23:08:00.0, age : 0, gender : 0})

In [43]:
def match_pattern_short(g: raphtory.Graph):
    _id = get_random_vertex(g)
    return next(iter(g.vertex(_id).out_neighbours()))

match_pattern_short(g)

Vertex(name=90871, properties={user_id : 90871, region : zilinsky kraj, cadca, registration : 2009-01-23 00:00:00.0, age : 26, public : 1, gender : 0, last_login : 2012-04-10 15:49:00.0, _id : 90871})

In [44]:
def match_vertex_on_label_property(g: raphtory.Graph):
    _id = get_random_vertex(g)
    for v in iter(g.vertices()):
        if v.name() == _id:
            return v

match_vertex_on_label_property(g)

Vertex(name=472249, properties={public : 1, registration : 2005-09-23 00:00:00.0, gender : 1, region : trnavsky kraj, galanta, _id : 472249, last_login : 2012-05-21 13:04:00.0, user_id : 472249, age : 0})

In [45]:
def match_vertex_on_label_property_index(g: raphtory.Graph):
    # TODO WE DONT HAVE PROPERTY INDEXes
    return g.vertex(get_random_vertex(g))
    

In [46]:
def update__vertex_on_property(g: raphtory.Graph):
    _id = get_random_vertex(g)
    g.add_vertex(2, _id, {'property': -1})
    
update__vertex_on_property(g)