In [None]:
import graphdb_client
import logging 
import json
import os
import json
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

G = nx.DiGraph()

logging.basicConfig(level=logging.DEBUG, format='[%(levelname)s] %(lineno)s %(message)s',)
g = graphdb_client.gc(host = 'http://127.0.0.1:5000')

mypath = os.path.abspath('')

def is_json(myjson):
  try:
    json_object = json.loads(myjson)
  except ValueError:
    return False
  return True


def print_json(rc):
    if is_json(rc):
        print(json.dumps(json.loads(rc), indent=4))
    else:
        print(rc)
        
def nx_load(rc, n=1):
    gp = json.loads(rc)
    G.clear()
    if gp["data"].has_key("vertices"):
        vertex_list = gp["data"]["vertices"]
        for vertex in vertex_list:
            v = (vertex["label"], vertex["id"])
            G.add_node(v)
    if gp["data"].has_key("edges"):
        edge_list = gp["data"]["edges"]
        for edge in edge_list:
            v_source = (edge["source_label"], edge["source_id"])
            v_target = (edge["target_label"], edge["target_id"])
            e = (v_source, v_target)
            G.add_edge(*e)
    if n == 1:
        nx.draw(G, pos=nx.circular_layout(G), nodecolor='r', edge_color='b', node_size=23)
    if n == 2:
        nx.draw(G, nodecolor='r', edge_color='b', node_size=43)

def cyc_load(rc, n=1):
    res = json.loads(rc)
    G.clear()
    for gp in res["components"]:
        if gp["statistics"]["num_vertices"] > 5 and gp["statistics"]["num_edges"] > 5:
            if gp["data"].has_key("vertices"):
                vertex_list = gp["data"]["vertices"]
                for vertex in vertex_list:
                    v = (vertex["label"], vertex["id"])
                    G.add_node(v)
            if gp["data"].has_key("edges"):
                edge_list = gp["data"]["edges"]
                for edge in edge_list:
                    v_source = (edge["source_label"], edge["source_id"])
                    v_target = (edge["target_label"], edge["target_id"])
                    e = (v_source, v_target)
                    G.add_edge(*e)
                    e = (v_target, v_source)
                    G.add_edge(*e)
    if n == 1:
        nx.draw(G, pos=nx.circular_layout(G), nodecolor='r', edge_color='b', node_size=23)
    if n == 2:
        nx.draw(G, nodecolor='r', edge_color='b', node_size=43)
     
    
# Load header vertex file which file is locate on the user machine
def load_movie_vertex():
    vertex_file_path = mypath + "/data/movie_vertice.csv"
    has_header = 1
    column_delimiter = ','
    default_vertex_label = "MOVIE"
    
    #in the content_type {"aaa":['aa','INT']}  
    #'aa' is the column name in the csv's header, 'aaa' is the property name you want to call in your graph
    content_type = [{"budget": ["budget", "DOUBLE"]},
    {"genres": ["genres", "STRING"]},{"keywords": ["keywords", "STRING"]},
    {"popularity": ["popularity", "FLOAT"]},{"revenue": ["revenue", "DOUBLE"]},
    {"runtime": ["runtime", "INT"]},{"title": ["title", "STRING"]},
    {"vote_average": ["vote_average", "FLOAT"]},{"vote_count": ["vote_count", "INT"]}]

    column_header_map = {
                "vertex_id": "id",
                "properties":content_type
            }


    rc = g.load_table_vertex(file_path = vertex_file_path,
                        has_header = has_header,
                        column_delimiter = column_delimiter, 
                        default_vertex_label = default_vertex_label,  
                        column_header_map = column_header_map, 
                        column_number_map=[{}],
                        content_type = content_type,
                        data_row_start = -1, 
                        data_row_end = -1)
    print_json(rc)
    
    
def load_actor_vertex():
    vertex_file_path = mypath + "/data/actor_vertice.csv"
    has_header = 1
    column_delimiter = ','
    default_vertex_label = "ACTOR"
    content_type = [{"name": ["name", "STRING"]},{"gender": ["gender", "INT"]}]

    column_header_map = {
                "vertex_id": "id",
                "properties":content_type
            }

    rc = g.load_table_vertex(file_path = vertex_file_path,
                        has_header = has_header,
                        column_delimiter = column_delimiter, 
                        default_vertex_label = default_vertex_label,  
                        column_header_map = column_header_map, 
                        column_number_map=[{}],
                        content_type = content_type,
                        data_row_start = -1, 
                        data_row_end = -1)
    print_json(rc)
    
    
def load_director_vertex():
    vertex_file_path = mypath + "/data/director_vertice.csv"
    has_header = 1
    column_delimiter = ','
    default_vertex_label = "DIRECTOR"
    content_type = [{"name": ["name", "STRING"]},{"gender": ["gender", "INT"]}]

    column_header_map = {
                "vertex_id": "id",
                "properties":content_type
    }

    rc = g.load_table_vertex(file_path = vertex_file_path,
                        has_header = has_header,
                        column_delimiter = column_delimiter, 
                        default_vertex_label = default_vertex_label, 
                        column_header_map = column_header_map, 
                        column_number_map=[{}],
                        content_type = content_type,
                        data_row_start = -1, 
                        data_row_end = -1)
    
    print_json(rc)
    
    
# Load header edge file which file is locate on local machine
def load_act_edge():
    edge_file_path = mypath + "/data/actor_edge.csv"
    has_header = 1
    column_delimiter = ','

    default_source_label = "MOVIE"
    default_target_label = "ACTOR"
    default_edge_label = 'ACT'
    content_type = [{"order":["order", "INT"]}]
    edge_column_header_map = {
                "source_id": "source_id",
                "target_id":"target_id",
                "properties":content_type
            }

    rc = g.load_table_edge(file_path = edge_file_path,
                      has_header = has_header, 
                      column_delimiter= column_delimiter, 
                      default_source_label = default_source_label, 
                      default_target_label = default_target_label, 
                      default_edge_label = default_edge_label, 
                      column_header_map = edge_column_header_map,  
                      column_number_map=[{}],
                      data_row_start= -1, 
                      data_row_end= -1)
    print_json(rc)
    
    
def load_act_edge_reverse():
    edge_file_path = mypath + "/data/actor_edge.csv"
    has_header = 1
    column_delimiter = ','

    default_source_label = "ACTOR"
    default_target_label = "MOVIE"
    default_edge_label = "ACT"
    content_type = [{"order":["order", "INT"]}]
    edge_column_header_map = {
                "source_id": "target_id",
                "target_id":"source_id",
                "properties":content_type
            }

    rc = g.load_table_edge(file_path = edge_file_path,
                      has_header = has_header, 
                      column_delimiter= column_delimiter, 
                      default_source_label = default_source_label, 
                      default_target_label = default_target_label, 
                      default_edge_label = default_edge_label, 
                      column_header_map = edge_column_header_map,  
                      column_number_map=[{}],
                      data_row_start= -1, 
                      data_row_end=-1)
    print_json(rc)
    
    
def load_dir_edge():
    edge_file_path = mypath + "/data/director_edge.csv"
    has_header = 1
    column_delimiter = ','

    default_source_label = "MOVIE"
    default_target_label = "DIRECTOR"
    default_edge_label = "DIR"
    content_type = []
    edge_column_header_map = {
                "source_id": "source_id",
                "target_id":"target_id",
                "properties":content_type
        }
    
    rc = g.load_table_edge(file_path = edge_file_path,
                      has_header = has_header, 
                      column_delimiter= column_delimiter, 
                      default_source_label = default_source_label, 
                      default_target_label = default_target_label, 
                      default_edge_label = default_edge_label, 
                      column_header_map = edge_column_header_map, 
                      column_number_map={},

                      data_row_start= -1, 
                      data_row_end=-1)
    print_json(rc)
    
    
def load_dir_edge_reverse():
    edge_file_path = mypath + "/data/director_edge.csv"
    has_header = 1
    column_delimiter = ','

    default_source_label = "DIRECTOR"
    default_target_label = "MOVIE"
    default_edge_label = "DIR"
    content_type = []
    edge_column_header_map = {
                "source_id": "target_id",
                "target_id":"source_id",
                "properties":content_type
        }
    
    rc = g.load_table_edge(file_path = edge_file_path,
                      has_header = has_header, 
                      column_delimiter= column_delimiter, 
                      default_source_label = default_source_label, 
                      default_target_label = default_target_label, 
                      default_edge_label = default_edge_label, 
                      column_header_map = edge_column_header_map, 
                      column_number_map={},
                      data_row_start= -1, 
                      data_row_end=-1)
    print_json(rc)

In [None]:
rc = g.delete_graph('movie_actor_director')
print_json(rc)

In [None]:
rc = g.create_graph(graph_name = 'movie_actor_director')
print_json(rc)
rc = g.set_current_graph('movie_actor_director')
print_json(rc)

In [None]:
g.list_graphs()

In [None]:
load_movie_vertex()
load_actor_vertex()
load_director_vertex()
load_act_edge()
load_act_edge_reverse()
load_dir_edge()
load_dir_edge_reverse()

In [None]:
rc = g.get_current_graph()
print_json(rc)

In [None]:
rc = g.get_schema()
print_json(rc)

In [None]:
rc = g.get_vertex(vertex_id="423", vertex_label="MOVIE")
print_json(rc)

In [None]:
rc = g.get_vertex(vertex_label=["DIRECTOR"])
print_json(rc)

In [None]:
rc = g.get_path("423", "MOVIE", "238", "MOVIE", [], 23)
print_json(rc)
nx_load(rc, 2)

In [None]:
rc = g.get_edge_out("423", "MOVIE")
print_json(rc)
nx_load(rc, 2)

In [None]:
rc = g.get_edge_in("423", "MOVIE")
print_json(rc)
nx_load(rc, 2)

In [None]:
rc = g.get_neighbor_out("423", "MOVIE")
print_json(rc)
nx_load(rc, 2)

In [None]:
rc = g.get_num_vertex(["MOVIE", "DIRECTOR"])
print_json(rc)

In [None]:
rc = g.get_egonet("423", "MOVIE", 2, ["DIR", "ACT"])
print_json(rc)
nx_load(rc, 2)

In [None]:
ids = ["423", "3556", "238"]
labels = ["MOVIE", "DIRECTOR", "MOVIE"]
rc = g.get_subgraph(ids, labels, ["DIR", "ACT"])
print_json(rc)
nx_load(rc, 2)

In [None]:
### This is only for DEMO, undirected graph is not right to be used for cycle searching!!!!

rc = g.get_cycle("423", "MOVIE", [], 7, ["DIR", "ACT"], [], "")        
print_json(rc)
cyc_load(rc, 2)

- Vertex Neighbors: Find the actor(s) whose movies earns highest profit in total. •Profit = Revenue – Budget

In [None]:
ACTORS = g.get_vertex(vertex_label=["ACTOR"]) 
ACTORS = json.loads(ACTORS)["data"]["vertices"] 
profit_actors = []
max_profit = float(0)
i = 0
for actor in ACTORS:
    actor_id = actor["id"]
    movies = json.loads(g.get_neighbor_out(actor_id, "ACTOR"))["data"]["vertices"] 
    profit = float(0)
    for movie in movies:
        if movie['label'] == 'MOVIE':
            if len(movie['properties']) > 7:
                profit += movie['properties'][4]['revenue'] - movie['properties'][0]['budget'] 
    if profit > max_profit:
        profit_actors = [] 
        profit_actors.append(actor_id) 
        max_profit = profit
    elif profit == max_profit: 
        profit_actors.append(actor_id)
    i = i+1
    if i == 1000:
        break



In [None]:
print "number of the profit actor: ", len(profit_actors)
print "actor's id: ", profit_actors
print "actor's products number: ", max_profit
print "more info about this actor: ", print_json(g.get_vertex("ACTOR", "2231"))