### Develop an application that recommend similar job titles based on the skills

In [1]:
import pandas as pd
from gensim import corpora, models, similarities

In [2]:
def load_data(df_input):
    df_skills = df_input.groupby("job_title")["recommended_skills_tfidf"].apply(list)
    titles = list(df_skills.index)
    skills = list(df_skills)
    return titles, skills

In [3]:
## top_skills.csv are top skills for each standard job

df_title_skills = pd.read_csv("top_skills.csv")
titles, skillss = load_data(df_title_skills)

#### Make similarity matrix

In [4]:
title_dict = {i: title for i,title in enumerate(titles)}
inv_title_dict = {title: i for i,title in enumerate(titles)}
skill_dict = corpora.Dictionary(skillss)
vectors = [skill_dict.doc2bow(skillset) for skillset in skillss]

tfidf = models.TfidfModel(vectors)
vectors_tfidf = tfidf[vectors]
lsi_model = models.LsiModel(vectors_tfidf, id2word=skill_dict, num_topics=500)
vectors_lsi = lsi_model[vectors_tfidf]

index = similarities.MatrixSimilarity(vectors_lsi)

#### Predict similar titles

In [5]:
def find_similar_titles(input_title, top_n=10):
    id_ex = inv_title_dict[input_title]
    skill_vector = skill_dict.doc2bow(skillss[id_ex])
    tfidf_vector = tfidf[skill_vector]
    lsi_vector = lsi_model[tfidf_vector]
    sims = sorted(enumerate(index[lsi_vector]), key=lambda item: -item[1])
    out_list = [(titles[i], s) for i,s in sims[1:top_n+1]]
    return out_list

In [6]:
job_title = "machine learning engineer"
find_similar_titles(job_title)

[('ai engineer', 0.93368685),
 ('ml engineer', 0.9050194),
 ('machine learning scientist', 0.9023429),
 ('artificial intelligence engineer', 0.9012699),
 ('learning software engineer', 0.8630173),
 ('machine learning specialist', 0.8509244),
 ('ai developer', 0.8335743),
 ('learning scientist', 0.81373686),
 ('machine learning developer', 0.79833734),
 ('machine learning expert', 0.7898615)]

In [7]:
job_title = "python developer"
find_similar_titles(job_title)

[('python django developer', 0.8588372),
 ('python engineer', 0.85697204),
 ('backend engineer', 0.8015604),
 ('go developer', 0.774391),
 ('api engineer', 0.76543325),
 ('stack python developer', 0.75738883),
 ('django developer', 0.7535784),
 ('back end engineer', 0.742126),
 ('ruby developer', 0.7142695),
 ('staff software engineer', 0.7108178)]