In [1]:
import json
import random

from search import Search

Load all the talent profiles and job profiles.

In [2]:
with open(r'data\processed\talent_data.json', 'r') as f:
    talent_data = json.load(f)

with open(r'data\processed\job_data.json', 'r') as f:
    job_data = json.load(f)

Initializing the model paths of the trained models.

NOTE: Only the Decision Tree and Logistic Regression models are used here. The rest (commented) can be also used if needed.

In [3]:
models = {'DecisionTree': r'models\2024-02-28_00-16-16_RandomForest.pkl',
          'LogisticRegression': r'models\2024-02-28_00-16-16_LogisticRegression.pkl'}

        #   'RandomForest': 'models\2024-02-28_00-16-16_RandomForest.pkl',
        #   'SupportVectorMachine': 'models\2024-02-28_00-16-16_SVC.pkl',
        #   'XGBoost': 'models\2024-02-28_00-16-16_XGBoost.pkl'}

# PART 1: One talent-job pair

### Known pair from data

Using the first pairs.

In [24]:
talent_profile = talent_data[0]
job_profile = job_data[0]

In [25]:
talent_profile

{'languages': [{'rating': 'C2', 'title': 'German'},
  {'rating': 'C2', 'title': 'English'},
  {'rating': 'B2', 'title': 'French'},
  {'rating': 'A2', 'title': 'Turkish'}],
 'job_roles': ['frontend-developer',
  'backend-developer',
  'full-stack-developer',
  'java-developer',
  'mobile-developer'],
 'seniority': 'junior',
 'salary_expectation': 48000,
 'degree': 'bachelor'}

In [26]:
job_profile

{'languages': [{'title': 'German', 'rating': 'C1', 'must_have': True},
  {'title': 'English', 'rating': 'B2', 'must_have': True}],
 'job_roles': ['frontend-developer'],
 'seniorities': ['junior', 'midlevel'],
 'max_salary': 70000,
 'min_degree': 'none'}

Using the match() fuction of the Search class from the search.py script to match one pair of talent and job profiles.

In [27]:
for name, model_path in models.items():
    
    search = Search(model_path)
    result = search.match(talent=talent_profile,
                          job=job_profile)
    print(f'\nModel: {name}')
    print('='*30)
    print(f'Result: {result}')
    print(f'\nLabel: {result["label"]}')
    print(f'Score: {result["score"]}')


Model: DecisionTree
Result: {'talent': {'languages': [{'rating': 'C2', 'title': 'German'}, {'rating': 'C2', 'title': 'English'}, {'rating': 'B2', 'title': 'French'}, {'rating': 'A2', 'title': 'Turkish'}], 'job_roles': ['frontend-developer', 'backend-developer', 'full-stack-developer', 'java-developer', 'mobile-developer'], 'seniority': 'junior', 'salary_expectation': 48000, 'degree': 'bachelor'}, 'job': {'languages': [{'title': 'German', 'rating': 'C1', 'must_have': True}, {'title': 'English', 'rating': 'B2', 'must_have': True}], 'job_roles': ['frontend-developer'], 'seniorities': ['junior', 'midlevel'], 'max_salary': 70000, 'min_degree': 'none'}, 'label': True, 'score': 1.0}

Label: True
Score: 1.0

Model: LogisticRegression
Result: {'talent': {'languages': [{'rating': 'C2', 'title': 'German'}, {'rating': 'C2', 'title': 'English'}, {'rating': 'B2', 'title': 'French'}, {'rating': 'A2', 'title': 'Turkish'}], 'job_roles': ['frontend-developer', 'backend-developer', 'full-stack-developer

### Unknown pair (randomly chosen from data)

The talent and job profiles are chosen randomly here.

In [8]:
random_integer_for_talent = random.randint(0, 1999)
random_integer_for_job = random.randint(0, 1999)

talent_profile = talent_data[random_integer_for_talent]
job_profile = job_data[random_integer_for_job]

In [9]:
talent_profile

{'languages': [{'rating': 'C2', 'title': 'German'},
  {'rating': 'B2', 'title': 'English'}],
 'job_roles': ['sales-manager',
  'key-account-manager',
  'customer-success-manager',
  'business-development-manager'],
 'seniority': 'junior',
 'salary_expectation': 46000,
 'degree': 'bachelor'}

In [10]:
job_profile

{'languages': [{'title': 'German', 'rating': 'C1', 'must_have': True},
  {'title': 'English', 'rating': 'C1', 'must_have': True}],
 'job_roles': ['sales-manager'],
 'seniorities': ['none', 'junior', 'midlevel'],
 'max_salary': 70000,
 'min_degree': 'bachelor'}

In [11]:
for name, model_path in models.items():
    
    search = Search(model_path)
    result = search.match(talent=talent_profile,
                          job=job_profile)
    print(f'\nModel: {name}')
    print('='*30)
    print(f'Result: {result}')
    print(f'\nLabel: {result["label"]}')
    print(f'Score: {result["score"]}')


Model: DecisionTree
Result: {'talent': {'languages': [{'rating': 'C2', 'title': 'German'}, {'rating': 'B2', 'title': 'English'}], 'job_roles': ['sales-manager', 'key-account-manager', 'customer-success-manager', 'business-development-manager'], 'seniority': 'junior', 'salary_expectation': 46000, 'degree': 'bachelor'}, 'job': {'languages': [{'title': 'German', 'rating': 'C1', 'must_have': True}, {'title': 'English', 'rating': 'C1', 'must_have': True}], 'job_roles': ['sales-manager'], 'seniorities': ['none', 'junior', 'midlevel'], 'max_salary': 70000, 'min_degree': 'bachelor'}, 'label': False, 'score': 0.02}

Label: False
Score: 0.02

Model: LogisticRegression
Result: {'talent': {'languages': [{'rating': 'C2', 'title': 'German'}, {'rating': 'B2', 'title': 'English'}], 'job_roles': ['sales-manager', 'key-account-manager', 'customer-success-manager', 'business-development-manager'], 'seniority': 'junior', 'salary_expectation': 46000, 'degree': 'bachelor'}, 'job': {'languages': [{'title': 

# PART 2: List of talent profiles and list of job profiles

NOTE: Please be careful with very large lists here, as it may take long time. In this example, only the first 10 entries of talent profiles and job profiles have been used. 100 combinations took 1.33 seconds to compute.

In [13]:
%%time

model_path = models['DecisionTree']

search = Search(model_path)
results = search.match_bulk(talents=talent_data[:10],
                            jobs=job_data[:10])

CPU times: total: 1.34 s
Wall time: 1.33 s


In [14]:
print(f'Total number of results: {len(results)}')

Total number of results: 100


Random range of results in the middle are displayed here to show that results are ranked in the descending order based on the score.

In [22]:
for result in results[20:35]:
    print(f'Label: {result["label"]}, Score: {result["score"]}')

Label: True, Score: 1.0
Label: True, Score: 1.0
Label: True, Score: 1.0
Label: True, Score: 1.0
Label: True, Score: 1.0
Label: True, Score: 1.0
Label: True, Score: 0.99
Label: True, Score: 0.87
Label: False, Score: 0.08
Label: False, Score: 0.05
Label: False, Score: 0.04
Label: False, Score: 0.04
Label: False, Score: 0.02
Label: False, Score: 0.01
Label: False, Score: 0.01
