In [3]:
from sentence_transformers import SentenceTransformer, util
from pprint import pprint
from typing import Dict, Any

import json

In [4]:
with open("sample-data/job-desc.txt", "r") as file:
    job_desc = file.read()

In [5]:
with open("sample-data/resumes.json", "r") as file:
    resumes = json.load(file)

### Compute job descrption to resume similarity scores

In [6]:
def compute_sim_scores(job_desc: str, resumes: list[str]) -> list[float]:
    # TODO: Experiment with different models:
    # - sentence-BERT?
    # - models fine-tuned for resume scanning tasks?
    model = SentenceTransformer("all-MiniLM-L6-v2")
    
    job_embedding = model.encode(job_desc, convert_to_tensor=True)
    resume_embeddings = model.encode(resumes, convert_to_tensor=True)
    
    return util.cos_sim(job_embedding, resume_embeddings).squeeze().tolist()

In [7]:
compute_sim_scores(job_desc, resumes)

[0.5131857991218567,
 0.3369634449481964,
 0.38393259048461914,
 0.40572261810302734,
 0.3709218502044678]

### Compute resume keyword scores

In [6]:
def compute_keyword_scores(resumes: list[str]) -> list[float]:
    # TODO: add logic to extract keywords from job description
    key_skills = {
        "Python": 0.2,
        "Java": 0.5,
        "C++": 0.6,
        "Agile": 0.3,
        "Software Development": 0.6,
    }
    
    # TODO: add logic to extract keywords from resumes
    def calculate_weighted_score(resume):
        skill_score = 0
        for skill, weight in key_skills.items():
            if skill.lower() in resume.lower():
                skill_score += weight
        return skill_score
    
    return [calculate_weighted_score(resume) for resume in resumes]

In [7]:
compute_keyword_scores(resumes)

[0.7, 0.2, 0.7, 0.2, 0.5]

### Putting it all together

In [8]:
def score_resumes(job_desc: str, resumes: list[str]) -> list[dict[str, Any]]:
    sim_scores = compute_sim_scores(job_desc, resumes)
    keyword_scores = compute_keyword_scores(resumes)
    
    final_scores = [
        {"resume": resumes[i], "score": sim_scores[i] + keyword_scores[i]}
        for i in range(len(resumes))
    ]
    
    final_scores_sorted = sorted(final_scores, key=lambda x: x["score"], reverse=True)
    
    return final_scores_sorted

In [9]:
pprint(score_resumes(job_desc, resumes))

[{'resume': 'John Doe\n'
            'Software Engineer\n'
            'john.doe@example.com | (123) 456-7890 | linkedin.com/in/johndoe\n'
            '\n'
            'Summary:\n'
            'Experienced software engineer with expertise in developing '
            'scalable web applications, strong knowledge of Python and '
            'JavaScript, and a passion for solving complex problems.\n'
            '\n'
            'Skills:\n'
            '- Programming Languages: Python, JavaScript, Java\n'
            '- Frameworks: Django, React, Spring Boot\n'
            '- Tools: Git, Docker, Kubernetes\n'
            '- Databases: PostgreSQL, MongoDB\n'
            '\n'
            'Experience:\n'
            'Software Engineer | ABC Tech | June 2020 - Present\n'
            '- Built and maintained scalable APIs to support high-traffic '
            'e-commerce platforms.\n'
            '- Led migration of a monolithic application to a microservices '
            'architecture, reducin