In [14]:
"""
Experimenting with Graph Data in Dato and Dato SGraph Objects
More about SGraph @ https://dato.com/products/create/docs/generated/graphlab.SGraph.html
"""
import graphlab as gl
from graphlab import SGraph, Vertex, Edge
g = SGraph()
verts = [Vertex(0, attr={'breed': 'labrador'}),
         Vertex(1, attr={'breed': 'labrador'}),
         Vertex(2, attr={'breed': 'vizsla'})]
g = g.add_vertices(verts)
g = g.add_edges(Edge(1,2))
print g

SGraph({'num_edges': 1, 'num_vertices': 3})


In [15]:
# Chain together into a single line to contstruct a new graph

g = SGraph().add_vertices([Vertex(i) for i in range(10)]).add_edges([Edge(i, i+1) for i in range(9)])
print g 

SGraph({'num_edges': 9, 'num_vertices': 10})


In [16]:
# Bond, James Bond...

from graphlab import SFrame
edge_data = SFrame.read_csv('http://s3.amazonaws.com/dato-datasets/bond/bond_edges.csv')

g = SGraph()
g = g.add_edges(edge_data, src_field='src', dst_field='dst')
print g 

PROGRESS: Finished parsing file http://s3.amazonaws.com/dato-datasets/bond/bond_edges.csv
PROGRESS: Parsing completed. Parsed 20 lines in 0.023708 secs.
------------------------------------------------------
Inferred types from first line of file as 
column_type_hints=[str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
PROGRESS: Finished parsing file http://s3.amazonaws.com/dato-datasets/bond/bond_edges.csv
PROGRESS: Parsing completed. Parsed 20 lines in 0.009394 secs.
SGraph({'num_edges': 20, 'num_vertices': 10})


In [17]:
# This same graph can be built in 2 lines! 

vertex_data = SFrame.read_csv('http://s3.amazonaws.com/dato-datasets/bond/bond_vertices.csv')

g = SGraph(vertices=vertex_data, edges=edge_data, vid_field='name', src_field='src', dst_field='dst')

print g

PROGRESS: Finished parsing file http://s3.amazonaws.com/dato-datasets/bond/bond_vertices.csv
PROGRESS: Parsing completed. Parsed 10 lines in 0.010577 secs.
------------------------------------------------------
Inferred types from first line of file as 
column_type_hints=[str,str,int,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
PROGRESS: Finished parsing file http://s3.amazonaws.com/dato-datasets/bond/bond_vertices.csv
PROGRESS: Parsing completed. Parsed 10 lines in 0.009086 secs.
SGraph({'num_edges': 20, 'num_vertices': 10})


In [19]:
# An SGraph can be created directly from a local or remote file
# graphlab.load_sgraph() @ https://dato.com/products/create/docs/generated/graphlab.load_sgraph.html

g.save('james_bond')
new_graph = gl.load_sgraph('james_bond')

In [20]:
# Inspecting SGraphs

g.show(vlabel='id', highlight=['James Bond', 'Moneypenny'], arrows=True)

Canvas is accessible via web browser at the URL: http://localhost:55989/index.html
Opening Canvas in default web browser.


In [21]:
# Summarize graphs before exploring 

print g.summary()

{'num_edges': 20, 'num_vertices': 10}


In [22]:
# Easily filter edges and vertices based on vertex IDs or attributes 

sub_verts = g.get_vertices(ids=['James Bond'])
print sub_verts

+------------+--------+-----------------+---------+
|    __id    | gender | license_to_kill | villian |
+------------+--------+-----------------+---------+
| James Bond |   M    |        1        |    0    |
+------------+--------+-----------------+---------+
[1 rows x 4 columns]



In [23]:
sub_edges = g.get_edges(fields={'relation': 'worksfor'})
print sub_edges

+---------------+-------------+----------+
|    __src_id   |   __dst_id  | relation |
+---------------+-------------+----------+
|       M       |  Moneypenny | worksfor |
|       M       |  James Bond | worksfor |
|       M       |      Q      | worksfor |
| Elliot Carver | Henry Gupta | worksfor |
| Elliot Carver |  Gotz Otto  | worksfor |
+---------------+-------------+----------+
[5 rows x 3 columns]



In [24]:
# Retrieve subsets of a data graph near a set of target vertices (aka - egocentric neighborhood of the target vertices).

targets = ['James Bond', 'Moneypenny']
subgraph = g.get_neighborhood(ids=targets, radius=1, full_subgraph=True)
subgraph.show(vlabel='id', highlight=['James Bond', 'Moneypenny'], arrows=True)

Canvas is updated and available in a tab in the default browser.


In [26]:
# SGraphs are structurally immutable, but the data stored on vertices and edges can be mutated using SGraph.vertices and SGraph.edges

g.edges.print_rows(5)             # SFrame is bound to g 
g.get_edges().print_rows(5)       # SFrame is independent from g 

+----------------+----------------+------------+
|    __src_id    |    __dst_id    |  relation  |
+----------------+----------------+------------+
|   Moneypenny   |       M        | managed_by |
|   Moneypenny   |       Q        | colleague  |
| Inga Bergstorm |   James Bond   |   friend   |
|  Henry Gupta   | Elliot Carver  | killed_by  |
|   James Bond   | Inga Bergstorm |   friend   |
+----------------+----------------+------------+
[20 rows x 3 columns]

+----------------+----------------+------------+
|    __src_id    |    __dst_id    |  relation  |
+----------------+----------------+------------+
|   Moneypenny   |       M        | managed_by |
|   Moneypenny   |       Q        | colleague  |
| Inga Bergstorm |   James Bond   |   friend   |
|  Henry Gupta   | Elliot Carver  | killed_by  |
|   James Bond   | Inga Bergstorm |   friend   |
+----------------+----------------+------------+
[20 rows x 3 columns]



In [27]:
# Mutate relationships between edges 

g.edges['relation'] = g.edges['relation'].apply(lambda x: x[0].upper())
g.get_edges().print_rows(5)

+----------------+----------------+----------+
|    __src_id    |    __dst_id    | relation |
+----------------+----------------+----------+
|   Moneypenny   |       M        |    M     |
|   Moneypenny   |       Q        |    C     |
| Inga Bergstorm |   James Bond   |    F     |
|  Henry Gupta   | Elliot Carver  |    K     |
|   James Bond   | Inga Bergstorm |    F     |
+----------------+----------------+----------+
[20 rows x 3 columns]



In [28]:
# Modify graph but do not mutate the relation attribute on the edges of the graph 

e = g.get_edges() 
e['relation'] = e['relation'].apply(lambda x: x[0].lower())
g.get_edges().print_rows(5)

+----------------+----------------+----------+
|    __src_id    |    __dst_id    | relation |
+----------------+----------------+----------+
|   Moneypenny   |       M        |    M     |
|   Moneypenny   |       Q        |    C     |
| Inga Bergstorm |   James Bond   |    F     |
|  Henry Gupta   | Elliot Carver  |    K     |
|   James Bond   | Inga Bergstorm |    F     |
+----------------+----------------+----------+
[20 rows x 3 columns]



In [37]:
# Experiment with different methods on a graph-related SFrame. Results in new instance of a regular SFrame.

e1 = g.edges.head(5)
e2 = g.edges.tail(5)
e1['is_friend'] = e['relation'].apply(lambda x: x[0] == 'F')
e2['is_frined'] = e['relation'].apply(lambda x: x[0] == 'F')
# e.show()
print e1, e2

# __id, __src_id, _dst_id fields are not mutable because changing them would change the structure of the graph and SGraph is structurally immutable

+----------------+----------------+----------+-----------+
|    __src_id    |    __dst_id    | relation | is_friend |
+----------------+----------------+----------+-----------+
|   Moneypenny   |       M        |    M     |     0     |
|   Moneypenny   |       Q        |    C     |     0     |
| Inga Bergstorm |   James Bond   |    F     |     1     |
|  Henry Gupta   | Elliot Carver  |    K     |     0     |
|   James Bond   | Inga Bergstorm |    F     |     1     |
+----------------+----------------+----------+-----------+
[5 rows x 4 columns]
 +---------------+--------------+----------+-----------+
|    __src_id   |   __dst_id   | relation | is_frined |
+---------------+--------------+----------+-----------+
| Elliot Carver | Henry Gupta  |    W     |     0     |
| Elliot Carver |  James Bond  |    K     |     0     |
| Elliot Carver | Paris Carver |    E     |     1     |
| Elliot Carver |  Gotz Otto   |    W     |     0     |
|   Gotz Otto   |  James Bond  |    K     |     1     |

In [39]:
# Easily modify graph data

g.edges['weight'] = 1.0
del g.edges['weight']

In [40]:
"""
Super interesting graph method: triple_apply   (https://dato.com/products/create/docs/generated/graphlab.SGraph.triple_apply.html#graphlab.SGraph.triple_apply) 

Can help solve classic problems with relative speed & ease:
single-source shortest path (https://en.wikipedia.org/wiki/Shortest_path_problem)
weighted PageRank (https://en.wikipedia.org/wiki/PageRank)
"""

def increment_degree(src, edge, dst):
    src['degree'] += 1 
    dst['degree'] += 1 
    return (src, edge, dst)

g.vertices['degree'] = 0

g = g.triple_apply(increment_degree, mutated_fields=['degree'])
print g.vertices.sort('degree', ascending=False)

+----------------+--------+-----------------+---------+--------+
|      __id      | gender | license_to_kill | villian | degree |
+----------------+--------+-----------------+---------+--------+
|   James Bond   |   M    |        1        |    0    |   8    |
| Elliot Carver  |   M    |        0        |    1    |   7    |
|       M        |   M    |        1        |    0    |   6    |
|   Moneypenny   |   F    |        1        |    0    |   4    |
|       Q        |   M    |        1        |    0    |   4    |
|  Paris Carver  |   F    |        0        |    1    |   3    |
| Inga Bergstorm |   F    |        0        |    0    |   2    |
|    Wai Lin     |   F    |        1        |    0    |   2    |
|  Henry Gupta   |   M    |        0        |    1    |   2    |
|   Gotz Otto    |   M    |        0        |    1    |   2    |
+----------------+--------+-----------------+---------+--------+
[10 rows x 5 columns]

