In [1]:
'''
This script is the main body of Job Recommendation function.
Input:
    1. Database of jobs
    2. Profile of one candidate
    3. Recommendation options
Components:
    1. Hard skill matching
    2. Soft skill matching
    3. Language matching
    4. Location matching
    5. Integration and ranking
Ouput: Job ranking
'''

'\nThis script is the main body of Job Recommendation function.\nInput:\n    1. Database of jobs\n    2. Profile of one candidate\n    3. Recommendation options\nComponents:\n    1. Hard skill matching\n    2. Soft skill matching\n    3. Language matching\n    4. Location matching\n    5. Integration and ranking\nOuput: Job ranking\n'

In [2]:
import json
import math
import numpy as np
import pandas as pd

In [3]:
def angle(x):
    return x * math.pi / 180

In [4]:
# modify options
HARD = True
SOFT = True
LANG = True
LOC = True

In [5]:
# modify weights as
HARD_WEIGHT = 1
SOFT_WEIGHT = 0.25
LANG_WEIGHT = 0.5
LOC_WEIGHT = 0.25
BETA = 1  # parameter in F-beta score

In [6]:
# insert data
jobs = pd.read_json("support/data.json")
candidate = {'hard skills': ['python', 'linux', 'c'],
             'soft skills': ['analytical skill'],
             'languages': ['cantonese', 'english'],
             'location': 'Sha Tin'
             }

In [7]:
# match hard skills
hard_score = []
cs = candidate['hard skills']
for r in jobs['requirements']:
    js = r['hard_skills']
    tp = len(set(cs) & set(js))
    if tp != 0:
        precision = tp / len(set(cs))
        recall = tp / len(set(js))
        f = (1 + BETA ** 2) * precision * recall / (BETA ** 2 * precision + recall)
        hard_score.append(f)
    else: hard_score.append(0)
hard_score = np.array(hard_score)
hard_score /= max(hard_score)

In [8]:
# match soft skills
soft_score = []
cs = candidate['soft skills']
for r in jobs['requirements']:
    js = r['soft_skills']
    tp = len(set(cs) & set(js))
    if tp != 0:
        precision = tp / len(set(cs))
        recall = tp / len(set(js))
        f = (1 + BETA ** 2) * precision * recall / (BETA ** 2 * precision + recall)
        soft_score.append(f)
    else: soft_score.append(0)
soft_score = np.array(soft_score)
soft_score /= max(soft_score)

In [9]:
# match languages
f = open("support/language_info.json")
lang_info = json.load(f)
lang_map = {}
impl_map = {}
for d in lang_info:
    for a in d['aliases']: lang_map[a] = d['language']
    for i in d['implications']:
        if d['language'] in impl_map: impl_map[d['language']].append(i)
        else: impl_map[d['language']] = [i]

lang_score = []
cl = candidate['languages']
for i in range(len(cl)):
    l = cl[i]
    if l in lang_map: cl[i] = lang_map[l]
    if l in impl_map:
        for L in impl_map[l]:
            if not L in cl: cl.append(L)

for r in jobs['requirements']:
    jl = r['languages']
    tp = len(set(cl) & set(jl))
    if tp != 0:
        precision = tp / len(set(cl))
        recall = tp / len(set(jl))
        f = (1 + BETA ** 2) * precision * recall / (BETA ** 2 * precision + recall)
        lang_score.append(f)
    else: lang_score.append(0)
lang_score = np.array(lang_score)
lang_score /= max(lang_score)

In [10]:
# match location
f = open("support/location_info.json")
loc_info = json.load(f)
loc_map = {}
for d in loc_info:
    for l in d['locations']: loc_map[l] = d['district']
coordinate = {d['district']: d['coordinate'] for d in loc_info}
f.close()

loc_score = []
cl = candidate['location']
if not cl in coordinate:
    if cl in loc_map: cl = loc_map[cl]
    else: cl = 'Others'

if cl == 'Others': loc_score = [0 for i in range(len(jobs))]
else:
    dist_map = {}
    v1 = (math.cos(angle(coordinate[cl][0])) * math.cos(angle(coordinate[cl][1])),
          math.cos(angle(coordinate[cl][0])) * math.sin(angle(coordinate[cl][1])),
          math.sin(angle(coordinate[cl][0])))
    for k, v in coordinate.items():
        v2 = (math.cos(angle(v[0])) * math.cos(angle(v[1])),
              math.cos(angle(v[0])) * math.sin(angle(v[1])),
              math.sin(angle(v[0])))
        dist_map[k] = 1 - np.dot(v1, v2)

    for jl in jobs['locations']:
        if jl == 'Others': loc_score.append(-1)
        else: loc_score.append(dist_map[jl])
    scale = max(loc_score)
    for i in range(len(loc_score)):
        if loc_score[i] != -1: loc_score[i] = 1 - loc_score[i] / scale
        else: loc_score[i] = 0

In [11]:
# integrate and rank
if not HARD: HARD_WEIGHT = 0
if not SOFT: SOFT_WEIGHT = 0
if not LANG: LANG_WEIGHT = 0
if not LOC: LOC_WEIGHT = 0

intg_score = HARD_WEIGHT * hard_score +\
             SOFT_WEIGHT * soft_score +\
             LANG_WEIGHT * lang_score +\
             [LOC_WEIGHT * s for s in loc_score]
jobs['hard_skill_score'] = hard_score
jobs['soft_skill_score'] = soft_score
jobs['language_score'] = lang_score
jobs['location_score'] = loc_score
jobs['integrated_score'] = intg_score
jobs = jobs.sort_values('integrated_score', ascending=False)

In [12]:
# output
jobs

Unnamed: 0,job_id,location_code,company,job_title,description,employment_types,selling_points,locations,categories,post_time,...,responsibilities,requirements,experience_required,tier-1_type,tier-2_type,hard_skill_score,soft_skill_score,language_score,location_score,integrated_score
10748,100003009055333,hk,Computime Ltd,Senior Engineer - Software,"Major Responsibilities\n\nDesign, code, and de...","[Full Time, Permanent]","[5 days work, double pay, and free shuttle coa...",Sha Tin,"[Information Technology (IT), Application Spec...",2022-01-18 02:12:49,...,"[software, implementation, design, communicati...","{'degrees': ['degree'], 'majors': ['computer e...","{'duration': ['5 years'], 'field': ['product d...",information-technology,application-specialist-software,1.000,0.0,0.857143,1.000000,1.678571
10731,100003009054494,hk,Open Creative Limited,Full Stack Web Developers,"Major Responsibilities\n\nDesign, code, and de...","[Full Time, Permanent]","[HTML5, CSS, JavaScript, React.js, Bootstrap, ...",Central-Western,"[Information Technology (IT), Application Spec...",2022-01-17 09:46:43,...,"[software, implementation, design, communicati...","{'degrees': ['degree'], 'majors': ['computer e...","{'duration': ['5 years'], 'field': ['product d...",information-technology,application-specialist-software,1.000,0.0,0.857143,0.822337,1.634156
11195,100003009031583,hk,Gientech Technology (Hong Kong) Limited,System Specialist (Linux / Windows),About us\n\nGientech is a global company offer...,"[Full Time, Permanent]","[Attractive Salary Package depends on ability,...",Wan Chai,"[Information Technology (IT), Hardware, Suppor...",2022-01-07 09:31:21,...,"[administration, installation, cost, software,...","{'degrees': [], 'majors': [], 'languages': ['e...","{'duration': [], 'field': []}",information-technology,hardware,1.000,0.0,0.800000,0.816406,1.604102
13721,100003009047669,hk,The University of Hong Kong,Part-time Student Research Assistant (509080),Part-time Student Research Assistant in the Sc...,[Part Time],"[assist in the research project, ecruit partic...",Southern,"[Admin & HR, Clerical / Admin Staff, HR Suppor...",2022-01-14 03:41:33,...,"[communication, review, administrative tasks, ...","{'degrees': [], 'majors': [], 'languages': ['c...","{'duration': [], 'field': []}",sciences-lab-research-development,research-development,0.875,0.0,1.000000,0.661062,1.540265
17225,100003009056020,hk,VTech Telecommunications Limited,Senior Software Engineer – IoT Product Develop...,We are expanding our research and development ...,"[Full Time, Permanent]","[Formulate cloud computing platform strategy, ...",Tai Po,"[Engineering, Engineering Project Management, ...",2022-01-18 03:11:52,...,"[commissioning, software, information security...","{'degrees': ['bachelor degree holder', 'master...","{'duration': ['at least 2 years', 'at least 5 ...",engineering,engineering-project-management,0.875,0.0,0.857143,0.930763,1.536262
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5309,100003009051592,hk,RecruitFirst Limited,德國知名相機品牌售貨員 Senior/Sales Advisor (首三個月包薪/23K+/...,主要職責:\n\n推廣及銷售各種高級相機，以達致店鋪銷售目標\n提供專業意見並教授客人攝影技...,[Full Time],[],Others,"[Sales, CS & Business Devpt, Retail Sales, Who...",2022-01-16 13:30:37,...,[],"{'degrees': [], 'majors': [], 'languages': [],...","{'duration': [], 'field': []}",sales-cs-business-devpt,wholesale,0.000,0.0,0.000000,0.000000,0.000000
5314,100003009045774,hk,Randstad Hong Kong Limited,Merchandise Planner,about the company.\nOur client is a leading fa...,"[Full Time, Permanent]","[Market leader in the industry, Regional offic...",Others,"[Transportation & Logistics, Supply Chain, Sal...",2022-01-13 08:38:54,...,"[management, work under pressure, sales target...","{'degrees': [], 'majors': [], 'languages': [],...","{'duration': [], 'field': []}",sales-cs-business-devpt,wholesale,0.000,0.0,0.000000,0.000000,0.000000
17519,100003009055593,hk,Robert Walters (HK),"Site Supervisor (ELV, 30K, 5-day, Client side)",We are searching for a Site Supervisor for our...,"[Full Time, Temporary]","[Client side representative, , ]",Others,"[Engineering, Electrical / Electronics, Engine...",2022-01-18 02:46:26,...,"[commissioning, quality, monitor, testing, sit...","{'degrees': ['certificate', 'diploma or above'...","{'duration': ['at least 3 years'], 'field': ['...",engineering,telecommunication-wireless-radio,0.000,0.0,0.000000,0.000000,0.000000
5330,100003009043841,hk,CODALAB INTERNATIONAL CO LTD,Sales Associates 售貨員,"人氣飾物品牌, 現邀請充滿活力的你加入我們的工作團隊。\n\nwww.codalab.co\...",[Full Time],[],Others,"[Sales, CS & Business Devpt, Retail Sales, Who...",2022-01-13 02:08:35,...,[facebook],"{'degrees': [], 'majors': [], 'languages': [],...","{'duration': [], 'field': []}",sales-cs-business-devpt,wholesale,0.000,0.0,0.000000,0.000000,0.000000
