In [1]:
import time
import warnings   
  
# Settings the warnings to be ignored 
warnings.filterwarnings('ignore')

import sys
import os
from dotenv import load_dotenv
project_dir = os.getenv("PROJECT_PATH")

sys.path.insert(1, project_dir + '/utils/')
sys.path.insert(2, project_dir + '/utils/neo4j/')

from connection import Neo4jConnection

import kb_util


def add_skills(rows, category, batch_size=10000):
    # Adds skill nodes to the Neo4j graph as a batch job.

    parameters = {'cat': category} 
    
    rows = [{'skill': row} for row in rows]

    query = '''UNWIND $rows AS row
    MERGE (s:Skill {name: row.skill, category: $cat})
    RETURN count(*) as total
    '''
    return insert_data(query, rows, batch_size, parameters)


def add_profiles(rows, batch_size=10000):
    # Adds profile nodes to the Neo4j graph as a batch job.

    rows = [{'profile': row} for row in rows]

    query = '''UNWIND $rows AS row
     CREATE (p:Profile {name:row.profile})
    RETURN count(*) as total
    '''
    return insert_data(query, rows, batch_size)


#CANDIDATE for UTILS
def insert_data(query, rows, batch_size = 10000, parameters=None):
    # Function to handle the updating the Neo4j database in batch mode.

    total = 0
    batch = 0
    start = time.time()
    result = None
  
    
    while batch * batch_size < len(rows):

        if parameters is None:
            if type(rows) is dict:
                param = {'rows': rows[batch*batch_size:(batch+1)*batch_size].to_dict('records')}
            else:
                param = {'rows': rows[batch*batch_size:(batch+1)*batch_size]}       
        else:
            if type(rows) is dict:
                param = parameters | {'rows': rows[batch*batch_size:(batch+1)*batch_size].to_dict('records')}
            else:
                param = parameters | {'rows': rows[batch*batch_size:(batch+1)*batch_size]}

        print(param)
        
        res = conn.query(query, parameters=param)
        total += res[0]['total']
        batch += 1
        result = {"total":total, "batches":batch, "time":time.time()-start}
        print(result)

    return result


def populate_db():    
      
    add_skills(kb_util.get_ict_list(), category='ICT')
    add_skills(kb_util.get_skill_languages(), category='Languages')
    add_skills(kb_util.get_skill_tools(), category='Tools')
    add_skills(kb_util.get_skill_databases(), category='Databases')
    add_skills(kb_util.get_skill_cloud(), category='Cloud')
    add_skills(kb_util.get_skill_libraries(), category='Libraries')
    add_skills(kb_util.get_skill_frameworks(), category='Frameworks')
        
    add_profiles(kb_util.get_profiles())   


def configure_db():
    conn.query('CREATE CONSTRAINT profiles IF NOT EXISTS FOR (p:Profile) REQUIRE p.name IS UNIQUE')
    conn.query('CREATE CONSTRAINT skills IF NOT EXISTS FOR (s:Skill) REQUIRE s.name IS UNIQUE')


def delete_skills():       
    conn.query('MATCH (s:Skill) DETACH DELETE s')


def delete_profiles():       
    conn.query('MATCH (p:Profile) DETACH DELETE p')
    

def setup_db():     
    
    delete_skills()
    delete_profiles()
    configure_db()
    populate_db()

In [2]:
conn = Neo4jConnection(uri=os.getenv("DB_URI"), 
                    user=os.getenv("DB_USERNAME"),              
                    pwd=os.getenv("DB_PASSWORD"))

setup_db()

{'cat': 'ICT', 'rows': [{'skill': 'sql server analysis services'}, {'skill': 'sql server integration services'}, {'skill': 'sql server reporting services'}, {'skill': 'critical thinking'}, {'skill': 'dashboarding'}, {'skill': 'use cases'}, {'skill': 'prototyping'}, {'skill': 'functiona requirements'}, {'skill': 'business requirements'}, {'skill': 'user accepting testing'}, {'skill': 'business process modeling'}, {'skill': 'quality assurance'}, {'skill': 'resource allocation'}, {'skill': 'graphic design'}, {'skill': 'prototyping'}, {'skill': 'data-driven decision-making'}, {'skill': 'social media marketing'}, {'skill': 'risk assessment and mitigation'}, {'skill': 'cybersecurity'}, {'skill': 'threat detection measures'}, {'skill': 'adobe creative suite'}, {'skill': 'project planning'}, {'skill': 'research'}, {'skill': 'restful apis'}, {'skill': 'ux/ui'}, {'skill': 'data governance'}, {'skill': 'data warehousing'}, {'skill': 'time series forecasting'}, {'skill': 'sentiment analysis'}, {'s