# JanusGraph and Gremlin queries

In [1]:
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.serializer import GraphSONSerializersV3d0

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from gremlin_python.process.anonymous_traversal import traversal

import os
host = os.environ.get("JANUSGRAPH_HOST", "localhost")

connection = DriverRemoteConnection(f"ws://{host}:8182/gremlin", "g", message_serializer=GraphSONSerializersV3d0())

In [4]:
graph = Graph()
g = graph.traversal().withRemote(connection)

In [5]:
g.addV('student').property('name', 'Jeffery').property('GPA', 4.0).next()

v[8272]

In [6]:
g.addV('student').property('name', 'Robert').property('GPA', 3.0).next()

v[4304]

In [7]:
v1, v2 = g.V().to_list()

In [8]:
v1

v[8272]

In [9]:
v2

v[4304]

In [10]:
g.addE("FRIEND_OF").from_(v1).to(v2).property("since", "2014").next()

e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': '3yi-6ds-36d-3bk'}}][8272-FRIEND_OF->4304]

In [11]:
g\
  .addV('student').property('name', 'Claire').property('GPA', 3.9).as_("n1")\
  .addV('student').property('name', 'Lisa').property('GPA', 3.6).as_("n2")\
  .addE("FRIEND_OF").from_("n1").to("n2").property("since", "2014")\
  .iterate()

[['addV', 'student'], ['property', 'name', 'Claire'], ['property', 'GPA', 3.9], ['as', 'n1'], ['addV', 'student'], ['property', 'name', 'Lisa'], ['property', 'GPA', 3.6], ['as', 'n2'], ['addE', 'FRIEND_OF'], ['from', 'n1'], ['to', 'n2'], ['property', 'since', '2014'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

In [12]:
g.V().to_list()

[v[8272], v[4304], v[8400], v[8416]]

In [13]:
g.E().to_list()

[e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': '3yi-6ds-36d-3bk'}}][8272-FRIEND_OF->4304],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': '3kq-6hc-36d-6hs'}}][8400-FRIEND_OF->8416]]

In [14]:
g.V().drop().to_list()

[]

In [15]:
g.E().drop().to_list()

[]

### Import Karate Club Graph 

In [16]:
import networkx

In [17]:
import pandas as pd

In [18]:
nodes = networkx.karate_club_graph().nodes
nodes = pd.DataFrame.from_records([{"id": node} | nodes[node] for node in nodes]).set_index("id")

In [19]:
edges = networkx.karate_club_graph().edges
edges = pd.DataFrame.from_records([{"id": edge} | edges[edge] for edge in edges]).set_index("id")

In [20]:
nodes.head()

Unnamed: 0_level_0,club
id,Unnamed: 1_level_1
0,Mr. Hi
1,Mr. Hi
2,Mr. Hi
3,Mr. Hi
4,Mr. Hi


In [21]:
edges.head()

Unnamed: 0_level_0,weight
id,Unnamed: 1_level_1
"(0, 1)",4
"(0, 2)",5
"(0, 3)",3
"(0, 4)",3
"(0, 5)",3


Graph Generation 

In [22]:
from gremlin_python.process.graph_traversal import GraphTraversalSource

In [23]:
from functools import reduce

def build_node_query(agg: GraphTraversalSource, id: str, label: str, properties:dict):
    id_str = str(id)
    agg = agg.add_v(label).property("id", id_str)
    for k, v in properties.items():
        agg.property(k, v)
    return agg.as_(f"n_{id_str}")

In [24]:
def build_edge_query(agg: GraphTraversalSource, id: tuple[str,str], label: str, properties:dict):
    source_str = str(id[0])
    target_str = str(id[1])
    edge = agg\
        .V().has("id", str(source_str)).as_("source")\
        .V().has("id", str(target_str)).as_("target")\
        .addE(label).from_("source").to("target")
    for k, v in properties.items():
        edge.property(k, v)
    return edge.as_(f"edge_{source_str}_{target_str}")

In [25]:
_ = reduce(lambda g, node: build_node_query(g, node[0], "Person", node[1].to_dict()), nodes.iterrows(), g).iterate()

In [26]:
_ = reduce(lambda g, edge: build_edge_query(g, edge[0], "FRIEND_OF", edge[1].to_dict()), edges.iterrows(), g).iterate()

In [27]:
g.V().has("club", "Mr. Hi").out("FRIEND_OF").has("club", 'Officer').count().next()

11

In [28]:
g.V().has("club", "Mr. Hi").out("FRIEND_OF").has("club", 'Officer').values("id").to_list()

['30', '30', '33', '33', '33', '31', '9', '27', '28', '32', '32']

In [29]:
g.V().has("club", "Officer").in_("FRIEND_OF").has("club", 'Mr. Hi').values("id").to_list()

['2', '2', '2', '2', '8', '8', '8', '0', '1', '13', '19']

### Drop databases

In [30]:
g.V().drop().to_list()

[]

In [31]:
g.E().drop().to_list()

[]

### Import Movie Graph

In [32]:
nodes = pd.read_pickle("nodes.pkl")

In [33]:
edges = pd.read_pickle("edges.pkl")

In [34]:
nodes.head()

Unnamed: 0_level_0,label,props
id,Unnamed: 1_level_1,Unnamed: 2_level_1
TheMatrix,Movie,"{'title': 'The Matrix', 'released': 1999, 'tag..."
Keanu,Person,"{'name': 'Keanu Reeves', 'born': 1964}"
Carrie,Person,"{'name': 'Carrie-Anne Moss', 'born': 1967}"
Laurence,Person,"{'name': 'Laurence Fishburne', 'born': 1961}"
Hugo,Person,"{'name': 'Hugo Weaving', 'born': 1960}"


In [35]:
edges.head()

Unnamed: 0_level_0,label,props
id,Unnamed: 1_level_1,Unnamed: 2_level_1
"(Keanu, TheMatrix)",ACTED_IN,"{'roles': '[""Neo""]'}"
"(Carrie, TheMatrix)",ACTED_IN,"{'roles': '[""Trinity""]'}"
"(Laurence, TheMatrix)",ACTED_IN,"{'roles': '[""Morpheus""]'}"
"(Hugo, TheMatrix)",ACTED_IN,"{'roles': '[""Agent Smith""]'}"
"(LillyW, TheMatrix)",DIRECTED,{}


Creation of edges and nodes batch by batch

In [36]:
from itertools import islice

def batched(iterable, n):
    "Batch data into lists of length n. The last batch may be shorter."
    # batched('ABCDEFG', 3) --> ABC DEF G
    it = iter(iterable)
    while True:
        batch = list(islice(it, n))
        if not batch:
            return
        yield batch

In [37]:
def create_from_batch(builder, iterable, batch_size):
    for batch in batched(iterable, batch_size):
        _ = reduce(lambda g, item: builder(g, item[0], item[1]["label"], item[1]["props"]), batch, g).iterate()

In [38]:
create_from_batch(build_node_query, nodes.iterrows(), 10)

In [39]:
create_from_batch(build_edge_query, edges.iterrows(), 10)

In [40]:
g.V().count().next()

171

In [41]:
g.E().count().next()

253

In [42]:
g.V().has('Person', 'name', 'Keanu Reeves').out("ACTED_IN").in_("ACTED_IN").values("name").dedup().to_list()

['Emil Eifrem',
 'Carrie-Anne Moss',
 'Laurence Fishburne',
 'Keanu Reeves',
 'Hugo Weaving',
 'Charlize Theron',
 'Al Pacino',
 'Gene Hackman',
 'Brooke Langton',
 'Orlando Jones',
 'Takeshi Kitano',
 'Dina Meyer',
 'Ice-T',
 'Jack Nicholson',
 'Diane Keaton']

In [43]:
g.V().drop().to_list()

[]

In [44]:
g.E().drop().to_list()

[]