In [None]:
# Imports 
import sys
from py2neo import neo4j, Graph, Relationship
import MySQLdb


In [None]:
# Create MySQL connection
user = ""
password = ""
database = "starmetrics"

# invoke the connect() function, passing parameters in variables.
db = MySQLdb.connect( user = user, passwd = password, db = database )

# output basic database connection info.
print( db )

cursor = db.cursor( MySQLdb.cursors.DictCursor )
# Print Curser
print(cursor)

In [None]:
# Test SQL 


# The Following is a Query of the starmetrics database
# It collects the following variables for Purdue University between the 
#years 2008 - 2012:  
#From the employee Table: id, employeeid, uniqueawardnumber, employeeoccupation
#From the occupation Table: occupationalclassification 

sql_select = "SElECT employee.employeeid, employee.uniqueawardnumber, employee.occupation_orig, \
occupation.occupationalclassification \
FROM starmetrics.employee \
INNER JOIN starmetrics.occupation ON \
starmetrics.employee.occupation_orig = occupation.occupation_orig \
WHERE starmetrics.employee.university = 'purdue' AND starmetrics.employee.year BETWEEN 2008 AND 2012;"

result_count = cursor.execute(sql_select);

# Count the number of rows returned 
print("Found " + str(result_count) + " rows")

# Testing
one = cursor.fetchone()
print(one.keys())
print(one)


# Store the results of the Query in a list
results = cursor.fetchall()


In [None]:
# I want to test the code without a a ton of cases, so here is my example results list 

results = [
    {'occupationalclassification': 'Undergraduate', 'occupation_orig': 'UNDERGRAD', 'employeeid': '90009387', 'uniqueawardnumber': "'00.500 NONGrant"},
    {'occupationalclassification': 'Undergraduate', 'occupation_orig': 'UNDERGRAD', 'employeeid': '90009387', 'uniqueawardnumber': "'00.500 NONGrant"},
    {'occupationalclassification': 'Graduate Student', 'occupation_orig': 'GRADUATE', 'employeeid': '90027173', 'uniqueawardnumber': '00.070 03-C-NE-PU'},
    {'occupationalclassification': 'Faculty', 'occupation_orig': 'FACULTY', 'employeeid': '90014540', 'uniqueawardnumber': '00.070 03-C-NE-PU'},
    {'occupationalclassification': 'Faculty', 'occupation_orig': 'FACULTY', 'employeeid': '90014540', 'uniqueawardnumber': '00.070 03-C-NE-PU'},

]

In [None]:
# Create connection to the Neo4jsdatabase using defaults
# note, I imported Graph when I imported py2neo
graph_db_test = Graph()
print graph_db_test

graph_db_test.create()

# Force nodes to have unique employeeids and 
graph_db_test.schema.create_uniqueness_constraint("employeeid", "uniqueawardnumber")


In [None]:
# Use this line to clear all nodes and relationships from the graph
#(good for debuging) 
graph_db_test.delete_all()

In [None]:
# First Things First,
# Below is an exaple of how to create a single node with the properties employeeid, uniqueidnumber, occupation_orig, 
#and occupation identification. 
# If we wanted to make a single node, this would be the syntax 
new_employee_node = neo4j.Node("employee", employeeid = '90014540', occupation_orig = "FACULTY", occupationalclassification = "Faculty")


graph_db_test.create( new_employee_node )
# Python uses a slightly different syntax to query the Neo4js database
#If we were using the Neo4js Turminal, this would be the Cypher Query we would type:
## CREATE (Employee1:Employee { id_num: "4", employeeid:'90014540', uniqueawardnumber:'00.070 03-C-NE-PU',occupation_orig:"FACULTY", occupationalclassification:"Faculty" })



In [None]:
# If you want to test Cypher query syntax, This is a good site to experiemtn with;
#Cypher playground: http://console.neo4j.org/?_ga=1.113286210.459216022.1444237641
# Warning, I would suggest refreshing this page often 

In [None]:
# Above I saved the results of the SQL Query into a list called results
# Below I iterate over that list, create properties based on the varibles in the SQL database,
# and finaly create an 'employee' node with those properties

for employee in results:
    id_num = employee['employeeid']
    occ_orig = employee['occupation_orig']
    occ_class = employee['occupationalclassification']
    award_num = employee['uniqueawardnumber']

    # Create a node with above infomation (This node is not in the Graph yet though)
    new_employee_node = neo4j.Node("Employee", employeeid = id_num, occcupation_orig = occ_orig, \
                      occupationalclassification = occ_class)
    new_award_node = neo4j.Node("Award", uniqueawardnumber = award_num)
    
    # Check to see that the employee is not already in the graph 
    if graph_db_test.find_one("Employee", "employeeid", id_num) == None:
        new_employee = True
        graph_db_test.create( new_employee_node )
    
    # Check to see that the award is not already in the graph 
    if graph_db_test.find_one("Award", "uniqueawardnumber", award_num) == None:
        new_award = True
        graph_db_test.create( new_award_node )
   
    # This needs to be updated because the logic is incomplete
    # right now, if the employee and the award are new, create a relationship between them
    # need to add the condition if the employee is working on a award that is already in the DB 
    if (new_employee & new_award):
        print Relationship(new_employee_node, "worked_on", new_award_node)
        new_relationship = Relationship(new_employee_node, "worked_on", new_award_node)
        graph_db_test.create( new_relationship )
    



In [None]:
# this will print all the reationships in the graph 
# ONLY RUN THIS IF YOU ARE USING THE SHORT RESULTS 
graph_db_test.cypher.execute("MATCH (n)-[r:worked_on]->(m) RETURN r;") 

In [None]:
# prints all the nodes 
# ONLY RUN THIS IF YOU ARE USING THE SHORT RESULTS 
graph_db_test.cypher.execute("MATCH (n) RETURN n;") 

In [None]:
# Jsut to prove that these employees were moved into the NEO databse, 
#lets query the database and collect 10 nodes


some_employees = graph_db_test.cypher.execute("MATCH (n:Employee) RETURN n")

print some_employees # Close Cursor


In [None]:
cursor.close()

# close connection
db.close()