# Job Recommendation Using Knowledge Graph

In [315]:
import networkx as nx
import pandas as pd
from pyvis.network import Network
from IPython.display import display


In [316]:
nx.__version__

'2.6.3'

In [317]:
job_description_dataset = pd.read_json("career_builder_jobs_10501.json")
resume_dataset=pd.read_json("resumes.json")

In [318]:
for resumeData in resume_dataset.values[:1] : # first two resumes
    print(resumeData[0]['id']) #id
    print(resumeData[0]['careerjunction_za_skills']) # skills
    print(resumeData[0]['careerjunction_za_courses']) # Degree 
    print(f" Experience {len(resumeData[0]['careerjunction_za_historical_jobtitles'])}") # Experience


1
['Emergency Handling', 'Microbiology', 'microsoft powerpoint', 'microsoft office', 'microsoft excel', 'microsoft project management', 'Microsoft word', 'Outlook', 'Internet explorer', 'Marketing/Sales', 'Quality Control', 'Quality Assurance', 'Research and development', 'Problem solving']
['Btech: Food Technology', 'National Diploma: Food Technology', 'Senior Certificate']
 Experience 5


In [319]:
for jobData in job_description_dataset[:1].values:  # first two job description
    print(jobData[17])  # Id
    print(jobData[0])  # Salary
    print(jobData[6])  # Skills
    print(jobData[2])  # Degree
    numbers = [int(word) for word in jobData[14].split() if word.isdigit()]
    numbers.append(0)
    experience = max(numbers) 
    print(experience) # Experience
    print(jobData[20])  # Job Type


2b387592-8148-5720-a661-a2730061d14c
$17.23 - $22.00 / hour
Emergency Handling
Bachelor's Degree
2
FULL_TIME


In [320]:
nodes = [n for n in resume_dataset.values][:2]
nodes_id=[n[0]['id'] for n in nodes]
# nodes_id

edges = [tuple(e) for e in job_description_dataset.values]
edges

[('$17.23 - $22.00 / hour',
  'https://www.careerbuilder.com/',
  "Bachelor's Degree",
  Timestamp('2021-05-05 03:01:49'),
  'Job Description\nTo implement the Head Start Performance Standards in the overall management of the classroom, including promoting social, physical, and intellectual growth, providing a safe, healthy environment that is developmentally, linguistically and culturally appropriate. Responsible for oversight of assistant teacher and classroom volunteers.\n\nJob Requirements\n\r    NYS Teacher Certification preferred\r    Bachelor&rsquo;s Degree in Early Childhood Education \r    One to two years experience teaching in an early childhood setting.\r    Requires a working level knowledge of early childhood developmentally appropriate practices.\r    Requires administrative, analytical, evaluative and oral and written communication skills; and an aptitude for training and motivating people.\r    Requires proficiency in the use of a personal computer.\r    Requires the h

In [321]:
G = nx.Graph()

jobid = []
req_skills = []
req_experience = []

counter = 0

for job in job_description_dataset[:1].values:
    _skills = job[6].split(",")
    for eachSkill in _skills:
        jobid.append(job[17])
        req_skills.append(eachSkill)
        numbers = [int(word) for word in jobData[14].split() if word.isdigit()]
        numbers.append(0)
        experience = max(numbers)
        req_experience.append(experience)
        counter += 1
        if(counter == 1):
            break

resumeId = []
resumeSkills = []
resumeExperience = []

# Side note: use a for loop instead of individual loops

resumes = [n for n in resume_dataset.values][:1]
resumeId = [n[0]['id'] for n in nodes]
resumeSkills = [n[0]['careerjunction_za_skills'] for n in nodes]
resumeExperience = [
    len(n[0]['careerjunction_za_historical_jobtitles']) for n in nodes]
resumeExperience

edge_data = zip(jobid, req_skills, req_experience)
node_data = zip(resumeId, resumeSkills, resumeExperience)


For Edges (Job Description)

In [322]:
for j, e in enumerate(edge_data):
    src = e[0]
    dst = e[1]
    w = e[2]
    print("he;")
    print(f"{src} {dst} {w}")

    G.add_node(src, color='#dd4b39', title=src)
    G.add_node(dst, title=dst)

    if str(w).isdigit():
        if w is None:

            G.add_edge(src, dst, value=w, color='#00ff1e', label=w)
        if 1 < w <= 70000:
            G.add_edge(src, dst, value=w, color='#FFFF00', label=w)
        if w > 70000:
            G.add_edge(src, dst, value=w, color='#dd4b39', label=w)

    else:
        G.add_edge(src, dst, value=0.1, dashes=True)


he;
2b387592-8148-5720-a661-a2730061d14c Emergency Handling 2


For Resumes

In [323]:
for j, e in enumerate(node_data):
    src = "ResumeId-" + str(e[0])
    dst = e[1]
    w=e[2]
    G.add_node(src,  color='#dd4b39', title=src)
    for eachSkill in dst:
        G.add_node(eachSkill, title=dst , experience=w)
        G.add_edge(src, eachSkill, color='#00ff1e')

# nx.draw_networkx(G, with_labels=True)

nt = Network(notebook=False)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('nx.html')


Find job description id's having skills

In [328]:
nx.node_connected_component(G, "Emergency Handling",)



{'2b387592-8148-5720-a661-a2730061d14c',
 'BootStrap',
 'C#',
 'Emergency Handling',
 'EntityFrameWork 6',
 'Internet explorer',
 'JQuery',
 'MVC5',
 'Marketing/Sales',
 'Microbiology',
 'Microsoft word',
 'Outlook',
 'Problem solving',
 'Quality Assurance',
 'Quality Control',
 'REST Services',
 'Research and development',
 'ResumeId-1',
 'ResumeId-2',
 'SQL Databse Development',
 'SSIS',
 'SSRS',
 'microsoft excel',
 'microsoft office',
 'microsoft powerpoint',
 'microsoft project management'}