# Job Recommendation Using Knowledge Graph

In [100]:
import networkx as nx
import pandas as pd
from pyvis.network import Network
from operator import itemgetter
from IPython.display import display


In [101]:
nx.__version__

'2.6.3'

In [102]:
job_description_dataset = pd.read_json(
    "Dataset\dataset_job_description_20.json")
resume_dataset = pd.read_json("Dataset\dataset_resume.json")


In [103]:
for resumeData in resume_dataset.values[:1] : # first two resumes
    print(resumeData[0]['id']) #id
    print(resumeData[0]['careerjunction_za_skills']) # skills
    print(resumeData[0]['careerjunction_za_courses']) # Degree 
    print(f" Experience {len(resumeData[0]['careerjunction_za_historical_jobtitles'])}") # Experience


1
['Emergency Handling', 'Microbiology', 'microsoft powerpoint', 'microsoft office', 'microsoft excel', 'microsoft project management', 'Microsoft word', 'Outlook', 'BootStrap', 'Internet explorer', 'Marketing/Sales', 'Quality Control', 'Quality Assurance', 'Research and development', 'Problem solving']
['Btech: Food Technology', 'National Diploma: Food Technology', 'Senior Certificate']
 Experience 5


In [104]:
for jobData in job_description_dataset[:1].values:  # first two job description
    print(jobData[18])  # Id
    print(jobData[0])  # Salary
    print(jobData[6])  # Skills
    print(jobData[2])  # Degree
    numbers = [int(word) for word in jobData[14].split() if word.isdigit()]
    numbers.append(0)
    experience = max(numbers) 
    print(experience) # Experience
    print(jobData[20])  # Job Type


2b387592-8148-5720-a661-a2730061d14c
$17.23 - $22.00 / hour
Emergency Handling,BootStrap, REST Services,EntityFrameWork 6 
Bachelor's Degree
2
NY


In [143]:
nodes = [n for n in resume_dataset.values][:2]
nodes_id=[n[0]['id'] for n in nodes]
# nodes_id

edges = [tuple(e) for e in job_description_dataset.values]
# edges

In [106]:
G = nx.Graph()

jobid = []
req_skills = []
req_experience = []

counter = 0

for job in job_description_dataset[:3].values:
    _skills = job[6].split(",")
    jobid.append(job[18])
    req_skills.append(_skills)
    numbers = [int(word) for word in job[14].split() if word.isdigit()]
    numbers.append(0)
    experience = max(numbers)
    req_experience.append(experience)
    counter += 1
    if(counter == 3):
        break

print(jobid)
print(req_skills)
print(req_experience)

resumeId = []
resumeSkills = []
resumeExperience = []
# Side note: use a for loop instead of individual loops

resumes = [n for n in resume_dataset.values][:3]
resumeId = [n[0]['id'] for n in resumes]
resumeSkills = [n[0]['careerjunction_za_skills'] for n in resumes]
resumeExperience = [
    len(n[0]['careerjunction_za_historical_jobtitles']) for n in resumes]
resumeExperience

edge_data = zip(jobid, req_skills, req_experience)
node_data = zip(resumeId, resumeSkills, resumeExperience)

print(len(resumeId))
print(len(jobid))


['2b387592-8148-5720-a661-a2730061d14c', 'cfc728ee-e7f8-5538-b1ee-0f6a2d12e1d1', '8b6b33e1-8dfc-5274-b3e6-cda37f549fdc']
[['Emergency Handling', 'BootStrap', ' REST Services', 'EntityFrameWork 6 '], ['Vocabularies', 'MYSQL', 'PHP', 'javascript', 'BootStrap', ' REST Services', ' Learning', ' Progress Reports'], ['MYSQL', 'PHP', 'javascript', ' Self Disciplin ', 'C++ Developer', 'JAVA Developer', ' Finance', ' Extroverted', ' Social Skills', ' Active Listening']]
[2, 4, 1]
3
3


For Edges (Job Description)

In [107]:
for j, e in enumerate(edge_data):
    src = e[0]
    dst = e[1]
    w = e[2]
    print(dst)
    G.add_node(src, color='#dd4b39', title=src,type='jobDescription')
    for eachSkill in dst:
        eachSkill=eachSkill.strip()
        G.add_node(eachSkill, title=dst, type='skill', experience=w)
        # if str(w).isdigit():
        if w is None:
            G.add_edge(src, eachSkill, value=w, color='#00ff1e', label=w)
        if 1 < w <= 70000:
            G.add_edge(src, eachSkill, value=w, color='#FFFF00', label=w)
        if w > 70000:
            G.add_edge(src, eachSkill, value=w, color='#dd4b39', label=w)

        else:
            G.add_edge(src, eachSkill) 


['Emergency Handling', 'BootStrap', ' REST Services', 'EntityFrameWork 6 ']
['Vocabularies', 'MYSQL', 'PHP', 'javascript', 'BootStrap', ' REST Services', ' Learning', ' Progress Reports']
['MYSQL', 'PHP', 'javascript', ' Self Disciplin ', 'C++ Developer', 'JAVA Developer', ' Finance', ' Extroverted', ' Social Skills', ' Active Listening']


For Resumes

In [108]:
for j, e in enumerate(node_data):
    src = "ResumeId-" + str(e[0])
    dst = e[1]
    w = e[2]
    G.add_node(src, color='#dd4b39', type='resume', title=src)
    for eachSkill in dst:
        eachSkill = eachSkill.strip()
        G.add_node(eachSkill, title=eachSkill, type='skill', experience=w)
        G.add_edge(src, eachSkill, color='#00ff1e')

nt = Network('1080px', '1960px', notebook=False)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('nx.html')


## Analysis of Network
#### Find Skills ,Job Description , Resume having highest degree. (which means highest connections)

In [109]:
degree_dict=dict(G.degree(G.nodes()))
nx.set_node_attributes(G,degree_dict,'degree')

sorted_degree= sorted(degree_dict.items(),key=itemgetter(1),reverse=True)

def popularNodeByType(type):
  for d in sorted_degree[:20]:
    if(G.nodes[d[0]]['type'] == type):
        print(f"{d}\n")
        break

print("Popular Skill:")
popularNodeByType('skill')

print("Popular Resume:")
popularNodeByType('resume')

print("Popular Description:")
popularNodeByType('jobDescription')



Popular Skill:
('BootStrap', 4)

Popular Resume:
('ResumeId-1', 15)

Popular Description:
('8b6b33e1-8dfc-5274-b3e6-cda37f549fdc', 10)



#### Eigen vector centrality take an account of nodes , edges as well as connections of that nodes

In [110]:
betweenness_dict = nx.betweenness_centrality(G,weight=True)  # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G,max_iter=600)  # Run eigenvector centrality

# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

sorted_betweenness = sorted(
    eigenvector_dict.items(), key=itemgetter(0), reverse=True)

print("Top 20 nodes by eigenvector centrality:")
count=0;
for b in sorted_betweenness:
    if(G.nodes[b[0]]['type']=='resume'):
        print(b)
        count+=1
    if count==20:
        break


Top 20 nodes by eigenvector centrality:
('ResumeId-3', 0.40047143341765357)
('ResumeId-2', 0.19814480654447283)
('ResumeId-1', 0.27468540689937954)


**Input**: skill
**Output:** recommend (description,
        resumes)

In [111]:
allConnectedNodes=nx.node_connected_component(G, 'BootStrap')
for node in allConnectedNodes:
    print(node)

MVC5
C# Developer
HTML5
2b387592-8148-5720-a661-a2730061d14c
wordpress
Research and development
Marketing/Sales
JQuery
ResumeId-1
8b6b33e1-8dfc-5274-b3e6-cda37f549fdc
cfc728ee-e7f8-5538-b1ee-0f6a2d12e1d1
Problem solving
Quality Assurance
Finance
Vocabularies
SQL Databse Development
Learning
AJAX
Progress Reports
SSRS
javascript
microsoft office
RDBMS
REST Services
microsoft powerpoint
JAVA Developer
Social Skills
Active Listening
PHP
microsoft project management
Microsoft word
Emergency Handling
microsoft excel
Quality Control
CSS3
Magento
Internet explorer
BootStrap
Outlook
ResumeId-2
Self Disciplin
Extroverted
Microbiology
C#
C++ Developer
EntityFrameWork 6
SSIS
MYSQL
ResumeId-3


#### Recommend Users to learn skills
[Bipertite graph concept applies here]

In [112]:
resume1_nbrs=G.neighbors('ResumeId-2')

resume3_nbrs=G.neighbors('ResumeId-3')

differentSkills= set(resume1_nbrs).difference(resume3_nbrs)
skillNodes=[]

for skill in differentSkills:
    skillNodes.append(G.nodes[skill])
    
sortedSkillsAccrToEigen = sorted(skillNodes, key=lambda i: i['eigenvector'],reverse=True)

print("Top Skills to be learned")
for skill in sortedSkillsAccrToEigen:
    print(f"{skill['title']} {round(skill['eigenvector'],3)}")


Top Skills to be learned
BootStrap 0.211
REST Services 0.147
Emergency Handling 0.141
EntityFrameWork 6 0.077
MVC5 0.046
SQL Databse Development 0.046
SSRS 0.046
JQuery 0.046
C# 0.046
SSIS 0.046


#### input: Resume 

#### output: List of recommended Job Descriptions

In [142]:
def get_all_adj_job_description_nodes(resume3_nbrs):
    related_job_description_nodes_dict = {}

    for skill in resume3_nbrs:
        skill_nabhour = G.neighbors(skill)
        for each_node in skill_nabhour:
            if(G.nodes[each_node]['type'] == 'jobDescription'):
                if G.nodes[each_node]['title'] in related_job_description_nodes_dict:
                    related_job_description_nodes_dict[G.nodes[each_node]
                                                       ['title']] += 1
                else:
                    related_job_description_nodes_dict[G.nodes[each_node]
                                                       ['title']] = 1

    return related_job_description_nodes_dict


resume3_nbrs = G.neighbors('ResumeId-3')
jobDescriptionNodes = get_all_adj_job_description_nodes(resume3_nbrs)

sortedJobDescriAccToConNodes = dict(sorted(
    jobDescriptionNodes.items(), key=lambda i: i[1], reverse=True))

print("Recomended Job Descriptions")
for job in sortedJobDescriAccToConNodes:
    print(f"{job} {sortedJobDescriAccToConNodes[job]}")


Recomended Job Descriptions
8b6b33e1-8dfc-5274-b3e6-cda37f549fdc 5
cfc728ee-e7f8-5538-b1ee-0f6a2d12e1d1 3
