In [1]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import sys
sys.path.insert(0, "../src/")
import util as util

%autosave 5

Autosaving every 5 seconds


In [2]:
queries = ["full-stack software engineer", "engineering manager","aspiring human resources"]
maxVisibleCandidates = 15

In [3]:
originalCandidates = pd.read_csv('../data/raw/potential-talents-AspiringHumanResources-SeekingHumanResources.csv')
originalCandidates.loc[originalCandidates['connection'] == '500+ ','connection'] = '500'
originalCandidates['connection'] = originalCandidates['connection'].astype(int)

# Spacy

In [4]:
for query in queries:
    print(query)
    candidates = originalCandidates.copy()
    candidates['fit'] = candidates['job_title'].apply(lambda x: util.scoreSpacySimilary(query,x))
    candidates = candidates.sort_values(by=['fit','connection'],ascending=False)
    candidates = candidates.reset_index(drop=True)
    for i in range(maxVisibleCandidates):
        print(f"{i}: {candidates.loc[i,'job_title']} {candidates.loc[i,'fit']}")
    print('---')

full-stack software engineer
0: Information Systems Specialist and Programmer with a love for data and organization. 0.563996787628674
1: Student at Indiana University Kokomo - Business Management - 
Retail Manager at Delphi Hardware and Paint 0.5228140441359616
2: Human Resources professional for the world leader in GIS software 0.4853645607821708
3: Nortia Staffing is seeking Human Resources, Payroll & Administrative Professionals!!  (408) 709-2621 0.4392605632997097
4: Aspiring Human Resources Professional | Passionate about helping to create an inclusive and engaging work environment 0.4355657572661699
5: Junior MES Engineer| Information Systems 0.4243483225610306
6: Experienced Retail Manager and aspiring Human Resources Professional 0.41077131418696483
7: Human Resources, Staffing and Recruiting Professional 0.4046824288211493
8: Retired Army National Guard Recruiter, office manager,  seeking a position in Human Resources. 0.40041681086893033
9: Aspiring Human Resources Managemen

# Bag Of Words

In [5]:
vectorizer = CountVectorizer()
job_titles = originalCandidates[['job_title']].values.ravel()
candidateVectors = vectorizer.fit_transform(job_titles)

for query in queries:
    print(query)
    candidates = originalCandidates.copy()
    queryVector = vectorizer.transform([query])
    candidates['fit'] = cosine_similarity(candidateVectors,queryVector)
    candidates = candidates.sort_values(by=['fit','connection'],ascending=False)
    candidates = candidates.reset_index(drop=True)
    for i in range(maxVisibleCandidates):
        print(f"{i}: {candidates.loc[i,'job_title']} {candidates.loc[i,'fit']}")
    print('---')

full-stack software engineer
0: Junior MES Engineer| Information Systems 0.3162277660168379
1: Human Resources professional for the world leader in GIS software 0.22360679774997896
2: Native English Teacher at EPIK (English Program in Korea) 0.0
3: People Development Coordinator at Ryan 0.0
4: Advisory Board Member at Celal Bayar University 0.0
5: HR Senior Specialist 0.0
6: Seeking Human Resources HRIS and Generalist Positions 0.0
7: SVP, CHRO, Marketing & Communications, CSR Officer | ENGIE | Houston | The Woodlands | Energy | GPHR | SPHR 0.0
8: Human Resources Coordinator at InterContinental Buckhead Atlanta 0.0
9: Native English Teacher at EPIK (English Program in Korea) 0.0
10: People Development Coordinator at Ryan 0.0
11: Native English Teacher at EPIK (English Program in Korea) 0.0
12: People Development Coordinator at Ryan 0.0
13: Advisory Board Member at Celal Bayar University 0.0
14: HR Senior Specialist 0.0
---
engineering manager
0: HR Manager at Endemol Shine North Americ

# TF-IDF (12/27)

# Word2Vec (12/27)

# Glove (12/28)

# FastText (12/29)

# Bert (12/30)

# Sbert (12/31)

# Conclusion