#Generate Database Using Faker Package


In [1]:
#Password for Neo4j Server
pwd = "aaaaaa"

In [2]:
# Specify the number of data generated
BASE = 10
N_PROJECTS = BASE
N_EMPLOYEES = BASE * 10
N_TASKS = BASE * 25


In [3]:
# Connection to neo4j
from py2neo import Graph, Node, Relationship
graph = Graph("http://localhost:7474/db/data/", user="neo4j", password=pwd)

#reset the database
graph.delete_all()

In [4]:
# Setup faker
import random
from faker import Faker
import faker.providers
from faker.providers import BaseProvider

fake_data = Faker()

class Custom(BaseProvider):
    
    #define the list of position in the company
    def position(self):
        position_list = ['developer', 'developer', 'developer', 'developer', 'developer', 'developer', 
                         'tester', 'tester', 'tester', 'project manager']
        return random.choice(position_list)
    
    def stage(self):
        stage_list = ['planning', 'in progress', 'in progress', 'finished']
        return random.choice(stage_list)
    
    def result(self):
        result_list = ['successful', 'successful', 'failed']
        return random.choice(result_list)
    
#add Custom to fake_data object
fake_data.add_provider(Custom)


In [5]:
#Employees properties:  name, position, email address
employee_no = 1
employees = []
managers = []

def makeEmployee():
    global employee_no
    empid = "Employee" + str(employee_no)
    employee_no = employee_no + 1
    pos = fake_data.position()
    emp = Node("EMPLOYEE", id = empid, name=fake_data.name(), position=pos, email=fake_data.safe_email())
    if(pos == 'project manager'):
        managers.append(emp)
    else:
        employees.append(emp)
    graph.create(emp)
    return emp


In [6]:
# Projects properties: title, start_date, due_date, stage, result
project_no = 100
projects = []

def makeProject():
    global project_no
    projid = "Project" + str(project_no)
    project_no = project_no + 1
    state = fake_data.stage()
    if state == 'finished':
        projresult = fake_data.result()
    else:
        projresult = "in progress"
    proj = Node("PROJECT", id = projid, title=fake_data.text(max_nb_chars=80), start_date=fake_data.date(), due_date=fake_data.date(), stage=state, result = projresult)
    projects.append(proj)
    graph.create(proj)
    return proj

In [7]:
#Tasks properties: name, start_date, due_date
task_no = 1000
tasks = []

def makeTask():
    global task_no
    taskid = "Task" + str(task_no)
    task_no = task_no + 1
    task = Node("TASK", id = taskid, name=fake_data.text(max_nb_chars=50), start_date=fake_data.date(), due_date=fake_data.date())
    tasks.append(task)
    graph.create(task)
    return task


In [8]:
# Make the basic nodes for Employees and Projects
# Task will be created along the creation of relationship

for i in range(N_EMPLOYEES):
    makeEmployee()

for i in range(N_PROJECTS):
    makeProject()

print ("Generated {} manager nodes, {} employee nodes, {} project nodes".format(len(managers), len(employees), len(projects)))

Generated 17 manager nodes, 83 employee nodes, 10 project nodes


In [9]:
# create the relationships
rels = []

from random import choices
# Add relationships.
# Build around projects.
for proj in projects:
    # first select a manager.
    manager = random.choice(managers)
    rel = Relationship(manager, 'MANAGES', proj)
    rels.append(rel)
    
    # how many employees?
    count = random.randint(5, 15)
    # get them
    indexes = random.sample(range(0, len(employees) - 1), count)
    peoples = []
    for index in indexes:
        peoples.append(employees[index])
        
    # how many tasks?
    count = random.randint(5, 25)
    # assign each task
    for i in range(count):
        task = makeTask()
        rel = Relationship(manager, "CREATES", task)
        rels.append(rel)
        rel = Relationship(task, "BELONGS_TO", proj)
        rels.append(rel)
        
        worker_count = random.randint(1, 5)
        for j in range(worker_count):
            who = random.choice(peoples)
            rel = Relationship(who, "WORKS_ON", task)
            rels.append(rel)


In [10]:
#Create Relationship into Graph
for rel in rels:
    graph.create(rel)

print("Successful create relationship to the Graph!")

Successful create relationship to the Graph!


In [11]:
#test that something happened
print(employees[0], projects[0], tasks[0])

(:EMPLOYEE {email: 'lsnyder@example.org', id: 'Employee1', name: 'Bryce Hernandez', position: 'developer'}) (:PROJECT {due_date: '1975-08-07', id: 'Project100', result: 'in progress', stage: 'in progress', start_date: '2014-04-04', title: 'Rock onto building early card carry surface.'}) (:TASK {due_date: '1996-10-15', id: 'Task1000', name: 'Federal hotel score add rather.', start_date: '2004-08-25'})
