In [1]:
from sentence_transformers import SentenceTransformer, util
from pprint import pprint
from typing import Dict, Any

import json

In [2]:
job_desc = """
Job Title: Software Engineer

Job Description:  
We are seeking a passionate and talented Software Engineer to join our dynamic team. In this role, you will design, develop, and maintain scalable software solutions to solve complex problems. You will collaborate with cross-functional teams to deliver high-quality applications and services.

Responsibilities:  
- Develop, test, and deploy software applications using modern programming languages and frameworks.  
- Work closely with product managers and designers to gather and refine requirements.  
- Write clean, efficient, and maintainable code, adhering to best practices.  
- Debug, troubleshoot, and optimize applications for performance and scalability.  
- Collaborate with team members in an Agile environment.

Qualifications:  
- Bachelor's degree in Computer Science, Software Engineering, or a related field, or equivalent experience.  
- Proficiency in one or more programming languages (e.g., Python, Java, C++).  
- Experience with software development tools, frameworks, and methodologies.  
- Strong problem-solving skills and attention to detail.  
- Excellent communication and teamwork abilities.

Join us and be part of a team that values innovation, growth, and collaboration!
"""

In [3]:
resumes = [
    "John Doe\nSoftware Engineer\njohn.doe@example.com | (123) 456-7890 | linkedin.com/in/johndoe\n\nSummary:\nExperienced software engineer with expertise in developing scalable web applications, strong knowledge of Python and JavaScript, and a passion for solving complex problems.\n\nSkills:\n- Programming Languages: Python, JavaScript, Java\n- Frameworks: Django, React, Spring Boot\n- Tools: Git, Docker, Kubernetes\n- Databases: PostgreSQL, MongoDB\n\nExperience:\nSoftware Engineer | ABC Tech | June 2020 - Present\n- Built and maintained scalable APIs to support high-traffic e-commerce platforms.\n- Led migration of a monolithic application to a microservices architecture, reducing downtime by 30%.\n\nEducation:\nB.S. in Computer Science | University of XYZ | May 2020",
    
    "Jane Smith\nData Scientist\njane.smith@example.com | (987) 654-3210 | github.com/janesmith\n\nSummary:\nData scientist with a strong background in machine learning, statistical modeling, and data visualization. Skilled in Python, R, and SQL with experience in predictive analytics.\n\nSkills:\n- Machine Learning: Scikit-learn, TensorFlow, PyTorch\n- Data Visualization: Tableau, Matplotlib, Seaborn\n- Databases: MySQL, PostgreSQL\n- Tools: Jupyter, Excel, Git\n\nExperience:\nData Scientist | DataCorp | March 2018 - Present\n- Developed machine learning models to predict customer churn, improving retention by 20%.\n- Automated ETL pipelines to streamline data processing, saving 15 hours of manual work weekly.\n\nEducation:\nM.S. in Data Science | University of ABC | December 2017",
    
    "Michael Brown\nFull-Stack Developer\nmichael.brown@example.com | (555) 123-4567 | michaelbrown.dev\n\nSummary:\nFull-stack developer with 5+ years of experience building responsive web applications and services. Proficient in JavaScript, TypeScript, and modern frameworks like React and Node.js.\n\nSkills:\n- Frontend: HTML, CSS, JavaScript, React\n- Backend: Node.js, Express, Python\n- Databases: MongoDB, PostgreSQL\n- Tools: Docker, AWS, Webpack\n\nExperience:\nFull-Stack Developer | XYZ Solutions | August 2019 - Present\n- Designed and implemented a customer management system used by over 10,000 users.\n- Improved application load times by 40% through optimized code and caching strategies.\n\nEducation:\nB.S. in Software Engineering | State University | May 2017",
    
    "Emily Johnson\nDevOps Engineer\nemily.johnson@example.com | (444) 789-0123 | emilyjohnson.dev\n\nSummary:\nDevOps engineer with 4+ years of experience in cloud infrastructure, CI/CD pipelines, and container orchestration. Skilled in AWS, Kubernetes, and Terraform.\n\nSkills:\n- Cloud Platforms: AWS, Azure\n- Tools: Docker, Kubernetes, Terraform, Jenkins\n- Scripting: Bash, Python\n- Monitoring: Prometheus, Grafana\n\nExperience:\nDevOps Engineer | CloudTech | July 2020 - Present\n- Automated infrastructure deployment using Terraform, reducing setup time by 50%.\n- Implemented CI/CD pipelines for microservices, accelerating deployments by 70%.\n\nEducation:\nB.S. in Information Technology | Tech University | May 2016",
    
    "Sophia Williams\nUX Designer\nsophia.williams@example.com | (333) 456-7890 | behance.net/sophiawilliams\n\nSummary:\nUX designer with a passion for creating user-centered designs and improving user experiences. Proficient in Figma, Adobe XD, and usability testing.\n\nSkills:\n- Design Tools: Figma, Adobe XD, Sketch\n- Research: Usability Testing, A/B Testing\n- Prototyping: InVision, Axure\n- Frontend: HTML, CSS, JavaScript\n\nExperience:\nUX Designer | Creative Studio | April 2019 - Present\n- Redesigned mobile app interfaces, resulting in a 25% increase in user engagement.\n- Conducted user research sessions to identify pain points and improve workflows.\n\nEducation:\nB.A. in Graphic Design | Design Institute | May 2018"
]

### Compute job descrption to resume similarity scores

In [4]:
def compute_sim_scores(job_desc: str, resumes: list[str]) -> list[float]:
    # TODO: Experiment with different models:
    # - sentence-BERT?
    # - models fine-tuned for resume scanning tasks?
    model = SentenceTransformer("all-MiniLM-L6-v2")
    
    job_embedding = model.encode(job_desc, convert_to_tensor=True)
    resume_embeddings = model.encode(resumes, convert_to_tensor=True)
    
    return util.cos_sim(job_embedding, resume_embeddings).squeeze().tolist()

In [5]:
compute_sim_scores(job_desc, resumes)

[0.5131856799125671,
 0.33696335554122925,
 0.383932888507843,
 0.4057227373123169,
 0.3709218502044678]

### Compute resume keyword scores

In [6]:
def compute_keyword_scores(resumes: list[str]) -> list[float]:
    # TODO: add logic to extract keywords from job description
    key_skills = {
        "Python": 0.2,
        "Java": 0.5,
        "C++": 0.6,
        "Agile": 0.3,
        "Software Development": 0.6,
    }
    
    # TODO: add logic to extract keywords from resumes
    def calculate_weighted_score(resume):
        skill_score = 0
        for skill, weight in key_skills.items():
            if skill.lower() in resume.lower():
                skill_score += weight
        return skill_score
    
    return [calculate_weighted_score(resume) for resume in resumes]

In [7]:
compute_keyword_scores(resumes)

[0.7, 0.2, 0.7, 0.2, 0.5]

### Putting it all together

In [8]:
def score_resumes(job_desc: str, resumes: list[str]) -> list[dict[str, Any]]:
    sim_scores = compute_sim_scores(job_desc, resumes)
    keyword_scores = compute_keyword_scores(resumes)
    
    final_scores = [
        {"resume": resumes[i], "score": sim_scores[i] + keyword_scores[i]}
        for i in range(len(resumes))
    ]
    
    final_scores_sorted = sorted(final_scores, key=lambda x: x["score"], reverse=True)
    
    return final_scores_sorted

In [9]:
pprint(score_resumes(job_desc, resumes))

[{'resume': 'John Doe\n'
            'Software Engineer\n'
            'john.doe@example.com | (123) 456-7890 | linkedin.com/in/johndoe\n'
            '\n'
            'Summary:\n'
            'Experienced software engineer with expertise in developing '
            'scalable web applications, strong knowledge of Python and '
            'JavaScript, and a passion for solving complex problems.\n'
            '\n'
            'Skills:\n'
            '- Programming Languages: Python, JavaScript, Java\n'
            '- Frameworks: Django, React, Spring Boot\n'
            '- Tools: Git, Docker, Kubernetes\n'
            '- Databases: PostgreSQL, MongoDB\n'
            '\n'
            'Experience:\n'
            'Software Engineer | ABC Tech | June 2020 - Present\n'
            '- Built and maintained scalable APIs to support high-traffic '
            'e-commerce platforms.\n'
            '- Led migration of a monolithic application to a microservices '
            'architecture, reducin