In [10]:
'''
This script gives possible paths from present occupation to target occupation.
Input:
    1. Database of jobs
    2. Present occupation
    3. Target occupation
Output: Paths
'''

'\nThis script gives possible paths from present occupation to target occupation.\nInput:\n    1. Database of jobs\n    2. Present occupation\n    3. Target occupation\nOutput: Paths\n'

In [11]:
import pandas as pd
import networkx as nx

In [12]:
# TODO: modify input instead of demo input below
source = 'software-development'
target = 'photography-video'

In [13]:
# TODO: insert real data instead of demo data below
jobs = pd.read_json("support/data.json")

In [14]:
# read data
type_vocab, skill_vocab, pair_vocab = {}, {}, {}
for i in range(len(jobs)):
    t = jobs['tier-2_type'][i]
    type_vocab[t] = type_vocab.get(t, 0) + 1
    for s in jobs['requirements'][i]['hard_skills']:
        skill_vocab[s] = skill_vocab.get(s, 0) + 1
        p = (s, t)
        pair_vocab[p] = pair_vocab.get(p, 0) + 1

In [15]:
# build occupation-skill graph
G1 = nx.Graph()
for t in type_vocab.keys(): G1.add_node(t)
for s in skill_vocab.keys(): G1.add_node(s)
for p in pair_vocab.keys(): G1.add_edge(p[0], p[1])

In [16]:
# calculate jaccard distance
jaccard_dict = {}
for t1 in type_vocab.keys():
    for t2 in type_vocab.keys():
        if not (t1, t2) in jaccard_dict and not (t2, t1) in jaccard_dict and t1 != t2:
            s1 = set(G1.neighbors(t1))
            s2 = set(G1.neighbors(t2))
            jaccard_dict[(t1, t2)] = 1 - len(s1 & s2) / len(s1 | s2)

In [17]:
# build job transition graph
G2 = nx.Graph()
for t in type_vocab.keys(): G2.add_node(t)
for j in jaccard_dict.items():
    if j[1] <= 0.6: G2.add_edge(j[0][0], j[0][1], weight=j[1])

In [18]:
# TODO: take output here
print("Shortest path: %s" % nx.shortest_path(G2, source, target, 'weight'))
print("Length: %s" % nx.shortest_path_length(G2, source, target, 'weight'))

Shortest path: ['software-development', 'ui-ux-designer', 'creative-design', 'photography-video']
Length: 1.7320430295658946


In [19]:
pair_vocab

{('project management', 'building-construction-qs'): 10,
 ('autocad', 'building-construction-qs'): 22,
 ('computer skills', 'building-construction-qs'): 6,
 ('microsoft office', 'building-construction-qs'): 6,
 ('make', 'building-construction-qs'): 3,
 ('problem solving skills', 'building-construction-qs'): 11,
 ('sketchup', 'building-construction-qs'): 2,
 ('photoshop', 'building-construction-qs'): 4,
 ('revit', 'building-construction-qs'): 4,
 ('bim', 'building-construction-qs'): 5,
 ('hkis', 'building-construction-qs'): 3,
 ('pc applications', 'building-construction-qs'): 1,
 ('certificate of registration of electrical worker',
  'building-construction-qs'): 1,
 ('maintenance', 'building-construction-qs'): 14,
 ('green card', 'building-construction-qs'): 3,
 ('no', 'building-construction-qs'): 2,
 ('word', 'building-construction-qs'): 7,
 ('analysis', 'building-construction-qs'): 3,
 ('documentation', 'building-construction-qs'): 3,
 ('rapid', 'building-construction-qs'): 1,
 ('micr