In [None]:
# Imports 
import sys
from py2neo import neo4j, Graph, Relationship
from py2neo.cypher import CypherWriter
import MySQLdb


In [None]:
# Create MySQL connection
user = ""
password = ""
database = "starmetrics"

# invoke the connect() function, passing parameters in variables.
db = MySQLdb.connect( user = user, passwd = password, db = database )

# output basic database connection info.
print( db )

cursor = db.cursor( MySQLdb.cursors.DictCursor )
# Print Curser
print(cursor)

In [None]:
# Test SQL 


# The Following is a Query of the starmetrics database
# It collects the following variables for Purdue University between the 
# years 2008 - 2012:  
# From the employee Table: id, employeeid, uniqueawardnumber, employeeoccupation
# From the occupation Table: occupationalclassification 

# NOTE: I limit the results to 200 
sql_select = "SElECT employee.employeeid, employee.uniqueawardnumber, employee.occupation_orig, \
occupation.occupationalclassification \
FROM starmetrics.employee \
INNER JOIN starmetrics.occupation ON \
starmetrics.employee.occupation_orig = occupation.occupation_orig \
WHERE starmetrics.employee.university = 'purdue' AND starmetrics.employee.year BETWEEN 2008 AND 2012 LIMIT 200;"

result_count = cursor.execute(sql_select);

# Count the number of rows returned 
print("Found " + str(result_count) + " rows")

# Testing
one = cursor.fetchone()
print(one.keys())
print(one)


# Store the results of the Query in a list
results = cursor.fetchall()


In [None]:
# Create connection to the Neo4jsdatabase using defaults
# Note, I imported Graph when I imported py2neo
graph_db_test = Graph()
print graph_db_test

#Create the Neo4j Database 
graph_db_test.create()

# Force nodes to have unique employeeids and uniqueawardnumbers
#graph_db_test.schema.create_uniqueness_constraint("employeeid", "uniqueawardnumber")


In [None]:
# Use this line to clear all nodes and relationships from the graph
#(good for debuging) 
graph_db_test.delete_all()

If you want to test Cypher query syntax, This is a good site to experiment with;

Cypher playground: http://console.neo4j.org/?_ga=1.113286210.459216022.1444237641

Warning, I would suggest refreshing this page often 

In [None]:
# Above I saved the results of the SQL Query into a list called results
# Below I iterate over that list, create properties based on the varibles in the SQL database,
# and finaly create an 'employee' node with those properties

for employee in results:
    id_num = employee['employeeid']
    occ_orig = employee['occupation_orig']
    occ_class = employee['occupationalclassification']
    award_num = employee['uniqueawardnumber']
    # Wee tiny bit of data cleaning
    if award_num[0] == "'":
        award_num = award_num[1:len(award_num)]

    # Create a node with above infomation (This node is not in the Graph yet though)
    new_employee_node = neo4j.Node("Employee", employeeid = id_num, occcupation_orig = occ_orig, \
                      occupationalclassification = occ_class)
    new_award_node = neo4j.Node("Award", uniqueawardnumber = award_num)
    
    new_employee = False
    new_award    = False
    
    # Check to see that the employee is not already in the graph 
    if graph_db_test.find_one("Employee", "employeeid", id_num) == None:
        new_employee = True
        graph_db_test.create( new_employee_node )
    
    # Check to see that the award is not already in the graph 
    if graph_db_test.find_one("Award", "uniqueawardnumber", award_num) == None:
        new_award = True
        graph_db_test.create( new_award_node )
   
    # Add relationships between the employees and the awards 
    
    # If both the employee and the award are new, create a realtionship between them
    if (new_employee & new_award):
        new_relationship = Relationship(new_employee_node, "worked_on", new_award_node)
        graph_db_test.create( new_relationship )
    
    # If jus t he employee is new, then the award must already exisit 
    elif (new_employee):
        # find the exsisting award node 
        exsitsting_award_node = graph_db_test.find_one("Award", "uniqueawardnumber", award_num)
        # Create realtionship between that node and the new employee
        new_relationship = Relationship(new_employee_node, "worked_on", exsitsting_award_node)
        graph_db_test.create( new_relationship )
    



 At this point we have a graph database that has awards and connected to those nodes are employees. In ASCI art, it looks somthin like this: 

            A       A 
           / \    / | \ 
          E   E  E  E  E 
There is an obvious missing realtionship here, right? Employees that "worked on" a grant "worked with" the other employees that worked on that grant. What we want is a graph that looks like this:    

            A       A 
           / \    / | \ 
          E-- E  E--E--E 
            
Lets Create those realtionships between employees

In [None]:
# When looking at Employees and Awards, each Award has one to many Employees. So the quickest way to find a group of 
#employees that worked together is to find individual awards 

# For this we will use a straingt cypher command 
# Note, these cypher commands return RecordList Objects, Not Lists 

# Notice that we are asking Cypher to return n.uniqueawardnumber AS uniqueawardnumber
# doing this will allow us to access this attribute as:  print awards_record[1].uniqueawardnumber
awards_record = graph_db_test.cypher.execute("MATCH (n:Award) RETURN n, n.uniqueawardnumber AS uniqueawardnumber;")

# The Cypher returned a RecordList Object, but a list would be easiest to work with;

"""
The following loop wil add every award number to a list ( I don't need the whole object to find the
employees who worked on that award )
"""
awards = []    
for record in awards_record:
    awards.append(str(record[1]))


In [None]:
# Next we want to get a list of employees that worked on each of those awards 

# for every award in our award list; 
count = 0 
while count < len(awards):
    # Create a Cypher Query that collects the employees that have "worked_on" that award;
    tx = graph_db_test.cypher.begin()
    statment = "MATCH (n)-[r:worked_on]->(m {uniqueawardnumber:{NUM}}) RETURN n.employeeid AS employeeid;"
    # This bit of code subsitutes NUM for the award number contained in out awards list 
    tx.append(statment, {"NUM": awards[count]})
    relivent_employees_record = tx.commit()
    
    # Create a Query that collects the number employees that have "worked_on" that award;
    tx = graph_db_test.cypher.begin()
    statment = "MATCH (n)-[r:worked_on]->(m {uniqueawardnumber:{NUM}}) RETURN Count(*) AS count;"
    tx.append(statment, {"NUM": awards[count]})
    relivent_employees_record_len = tx.commit()
    
    # Get the number of returned nodes as a number (Cuz remember, the cypher Returns a RecordList)
    relivent_employees_record_len = relivent_employees_record_len[0][0].count
    
    #print relivent_employees_record
    #print relivent_employees_record[0][0].employeeid

    
    #I want emplyees in a list, not a Record Object, 
    i = 0
    relivent_employees = []
    while i < relivent_employees_record_len:
        relivent_employees.append(str(relivent_employees_record[0][i].employeeid))
        i += 1
    
    # Next create Relatioships between the employees'
    """
    What I'm doing here is saying connect the first employee with each employee that follows it in the list,
    then repete the process with the next employee until you reach the end of the list 
    
    There is a bug in the logic here
    """
    current_employee = 0 
    while current_employee < len(relivent_employees):
        next_employee = current_employee + 1 
        while next_employee < len(relivent_employees):
            tx = graph_db_test.cypher.begin()
            statment = "MATCH (n {employeeid: {NUM1}}), (m {employeeid: {NUM2}})\
            CREATE (n)-[r:worked_with]->(m) RETURN r;"  
            tx.append(statment, {"NUM1": relivent_employees[current_employee], "NUM2": relivent_employees[next_employee]})
            tx.commit()
            next_employee += 1 
        current_employee += 1
    

    #DONT COMMENT THIS OUT REN
    count += 1

In [None]:
graph_db_test.cypher.execute("MATCH (n)-[r:worked_with]-(b) RETURN r;")

In [None]:
def recordlist_to_list(recordlist, record_property, len_of_recordlist = 1):
    """
    Used to convert the result of a cypher query into a python list. 
    The Cypher Query should return a single property of the node as an alias, ie 
    graph_db.cypher.execute("MATCH (n) RETURN n.record_property AS record_property  
    
    Parameters
    --------------------------------
    recordlist: A RecordList object returned by a Cypher.execute command 
    record_property: The property that was returned by the Cypher.execute command as an alias 
    len_of_recordlist: the number of nodes returned by the Cypher.execute command. Defaluted to 1
    """
    
    count = 0
    final_list = []
    while count < len_of_recordlist:
        command = "recordlist[" + str(count) + "][0]" +"['" + record_property +"']"      
        final_list.append( eval(command))
        count += 1 
    return final_list

In [None]:
## Collect a collection of nodes to display
# maybe i'll make this a subgraph later '

# Get a award node (I picked a random Award number to seach for)
# Notice that this Query return only the award number
specific_award_node = graph_db_test.cypher.execute("MATCH (m {uniqueawardnumber:'00.070 03-C-NE-PU'}) RETURN m;")

# Collect all the employees that worked on that award
# Notice that this Query return only the employee id number 
associated_employees = graph_db_test.cypher.execute("MATCH (n:Employee) -[r:worked_on]->(a {uniqueawardnumber:'00.070 03-C-NE-PU'}) RETURN n;") 

# Count number of employees that worked on that Awars ( this will be used below )
num_associtated_employees = graph_db_test.cypher.execute("MATCH (n:Employee) -[r:worked_on]->(a {uniqueawardnumber:'00.070 03-C-NE-PU'}) RETURN COUNT(n);") 

# Remember, all the queries above return RecordList objects, which is a little awkward to deal with. So i'm going to use 
# the function I created, record_to_list(),to convert the infomation I want into a python list 

# First, I convert the award node ( Note, because the award numbers are unique, I know there are only one of these)
specific_award_node = recordlist_to_list(specific_award_node, 'uniqueawardnumber')

# Next , I  need the number of Employees who worked on that project 
num_associtated_employees = graph_db_test.cypher.execute("MATCH (n:Employee) -[r:worked_on]->\
(a {uniqueawardnumber:'00.070 03-C-NE-PU'}) RETURN n;").to_subgraph().order


# Finally, I use the number of employees who worked on that node 
associated_employees = recordlist_to_list(associated_employees, "employeeid", num_associtated_employees)


#print award node
for item in specific_award_node:
    print item

#print associated_employees
for item in associated_employees:
    print item

### Plotting with Networkx

In [None]:
# Creating Networks Objects 

#Imports that should really be at the top of the notebook 
import networkx as nx
# Allows plots to be showed inline 
%matplotlib inline
# Matplotlib is the default package for
# rendering the graphs
import matplotlib.pyplot as plt

# Example Plot 

# Create the Networkx Graph 
G=nx.Graph()

# Distonarys to contain colors and labels
# Not nessasary
custom_node_color = {}
custom_labels     = {}

#Create the networkx award node 
award_node = G.add_node(specific_award_node[0])
custom_node_color[specific_award_node[0]] = 'r'

#Create networkx nodes for the employees that worked on that node 
for item in associated_employees:
    G.add_node(item)
    custom_node_color[item] = 'c'
    custom_labels[item] = str(item)
    # We know at each of these employees are connected to the award, so lets draw those relationshipd 
    # Note, in Networkx relationships are called edges 
    G.add_edge(item, award_node)
    

print G.nodes()

# Draw the plot 
nx.draw(G,labels=custom_labels, node_color=custom_node_color.values())
plt.show()


# Idk why we always get a bonus None node, I like to thnk of it as a gift 

In [None]:
def neo4j_to_networks(graph):
    """
    Converts a neo4j graph into a networks graph and displays a plot of that graph 
    
    Parameters
    -------------
    graph - a neo4j graph -- must have Employee nodes and Award nodes 
    """
    
    nodes = {}
    #Collect all the nodes 
    
    all_employee_nodes = graph.cypher.execute("MATCH (n:Employee) RETURN n;")
    num_all_employee_nodes = graph.cypher.execute("MATCH (n:Employee) RETURN n;").to_subgraph().order
    
    count = 0
    while count < num_all_employee_nodes:
        current_node =  all_employee_nodes[count][0]
        node_type = "employee"
        id_num = current_node['employeeid']
        occ_orig = current_node['occcupation_orig']
        occ_class = current_node['occupationalclassification']
        nodes[id_num] = {'node_type': node_type, 'occcupation_orig': occ_orig, 'occupationalclassification': occ_class}
        count += 1 
    
    # Now for the award nodes     
    all_award_nodes = graph.cypher.execute("MATCH (n:Award) RETURN n;")
    num_all_award_nodes = graph.cypher.execute("MATCH (n:Award) RETURN n;").to_subgraph().order
    
    count = 0
    while count < num_all_award_nodes:
        current_node =  all_award_nodes[count][0]
        award_num = current_node['uniqueawardnumber']
        node_type = "award"
        nodes[award_num] = {'node_type': node_type}
        count += 1 
      
#    for node in nodes:
#        print node, nodes[node]     
    
    # Relationships 
    employee_award_relationships = graph.cypher.execute("MATCH (a:Employee)-[r]-(b:Award) RETURN a.employeeid, type(r), b.uniqueawardnumber;")
    #print employee_award_relationships
    
    employee_employee_relationships = graph.cypher.execute("MATCH (a:Employee)-[r]->(b:Employee) RETURN a.employeeid, type(r), b.employeeid;")
    #print employee_employee_relationships
    
    
    # Create Networks graph 
    NG =nx.Graph()
    
    # Distonarys to contain colors and labels
    # Not nessasary
    custom_node_color = {}
    custom_labels     = {}
    
    #Create the networkxs nodes 
    for node in nodes:   
        # This next line create a node with the ideification number as the node name and 
        # populates the node's atributes with the dictonary represtend by nodes[node]
        NG.add_node(node, nodes[node])
        if nodes[node]['node_type'] == 'award':
            custom_node_color[node] = 'r'     
        else: 
            custom_node_color[node] = 'g'

    
    #print NG.node['90030442']['node_type']
    
    
    # Next, lets create relationships between the nodes based on our dictoarys list above 
    
    for record in employee_award_relationships:
        NG.add_edge(record[0], record[2], {'edge_type': record[1]})
    
    for record in employee_employee_relationships:
        NG.add_edge(record[0], record[2], {'edge_type': record[1]})

    # Draw the Graph 
    nx.draw(NG,labels=custom_labels, node_color=custom_node_color.values())
    plt.show()
    return NG 
    
networkx_graph = neo4j_to_networks(graph_db_test)

In [None]:
graph_db_test.cypher.execute("MATCH (n)-[r]-(b) RETURN r;")

### Network Measurments 
This is the vacab for studing a network 

 **Degree Centrality ** - counts the number of edges that a node has 
     - Nodes with a high degree of connections usally play an important role in a network 
 **Betweenness ** - indicator of a nodes centality in a network. 
     - Equal to the number of shortes paths from all vertices to all others that pass through that node 
 **Diameter** - The longest shrortest path over all pairs of nodes 
     - Often we want to find the shortest distance between two nodes, the diameter is the longest of theses paths 
     - Nodes that occur on many shortest paths between other nodes in the graph have a high betweenness centrality score 
 **Eigenvector Centality ** - the sum of the centrality scores of it's neighbors 
     - A Node is important if it is connected to other important nodes 
     - A few influental contacts may outrant many medicore contacts 

The functions below are written for your conviencence 
     
   
 

In [None]:
# Convienicnce Funtions from http://www.slideshare.net/arnicas/a-quick-and-dirty-intro-to-networkx-and-d3

def calculate_degree_centrality(graph):
    ''' 
    Calculate degree centrality of a node, sets value on node as attribute; returns graph, and dict of the degree centrality values.
	Also has code to print the top 10 nodes by degree centrality to console
    
    Parametes
    ----------
    graph - A Networkx object Graph 
	'''
    g = graph
    dc = nx.degree_centrality(g)
    nx.set_node_attributes(g, 'degress_centrality', dc)
    degcent_sorted = sorted(dc.items(), key = operator.itemgetter(1), reverse = True)
    for key, vlaue in degcent_sorted[0:10]:
        print "Highest degree Centrality:", key, vlaue
    return graph, dc 

def calculate_betweenness(graph):
	''' 
    Calculate betweenness centrality of a node, sets value on node as attribute; returns graph, and dict of the betweenness centrality values
	
    Parametes
    ----------
    graph - A Networkx object Graph 
    '''
	g = graph
	bc=nx.betweenness_centrality(g)
	nx.set_node_attributes(g,'betweenness',bc)
	return g, bc

def calculate_eigenvector_centrality(graph):  
	''' 
    Calculate eigenvector centrality of a node, sets value on node as attribute; returns graph, and dict of the eigenvector centrality values.
	Also has commented out code to sort by ec value
    
    Parametes
    ----------
    graph - A Networkx object Graph 
	'''
	g = graph
	ec = nx.eigenvector_centrality(g)
	nx.set_node_attributes(g,'eigen_cent',ec)
	ec_sorted = sorted(ec.items(), key=operator.itemgetter(1), reverse=True)
	return g, ec

def find_cliques(graph):
	''' 
    Calculate cliques and return as sorted list.  Print sizes of cliques found.
    
    Parametes
    ----------
    graph - A Networkx object Graph 
	'''
	g = graph
	cl = nx.find_cliques(g)
	cl = sorted(list( cl ), key=len, reverse=True)
	print "Number of cliques:", len(cl)
	cl_sizes = [len(c) for c in cl]
	print "Size of cliques:", cl_sizes
	return cl


In [None]:
calculate_degree_centrality(NG)
calculate_eigenvector_centrality(NG)
find_cliques(NG)
calculate_betweenness(NG)

In [None]:
# igraph example 

# import igraph 
import igraph

# I graph example 

graph = {
    'nodes': {
        'ross': {'color': 0xffaaaa, 'size': 2.0},
        'joey': {'size': 0.5},
        'chandler': {'color': 0x2222ff, 'size': 1.25},
        'phoebe': {'color': 0x22ff22},
        'rachel': {},
        'monica': {},
        'jack': {},
        'judy': {},
    },
    'edges': [
        {'source': 'chandler', 'target': 'ross'},
        {'source': 'monica', 'target': 'ross'},
        {'source': 'ross', 'target': 'rachel', 'size': 3, 'color': 0xffaaaa},
        {'source': 'ross', 'target': 'joey'},
        {'source': 'ross', 'target': 'phoebe'},
        {'source': 'ross', 'target': 'judy'},
        {'source': 'monica', 'target': 'rachel'},
        {'source': 'rachel', 'target': 'jack'},
        {'source': 'chandler', 'target': 'phoebe'}
    ]
}

igraph.draw(graph)

In [None]:
cursor.close()

# close connection
db.close()