In [1]:
import pandas as pd
import numpy as np
import cma  # Covariance Matrix Adaptation Evolution Strategy

# Load the dataset
jobs_df = pd.read_excel('jobs.xlsx')

# HR provides a dictionary with required skills and corresponding experience levels
hr_requirements = {
    'React': 5,        # 5+ years experience in React
    'Python': 2,       # 2+ years experience in Python
    'Tensorflow': 1,   # 1+ year experience in JavaScript
}

jobs_df.head()



Unnamed: 0,Name,Skills,Years of Experience
0,Alice Johnson,Python,5
1,Bob Smith,Python,3
2,Charlie Brown,Python,1
3,David Wilson,JavaScript,10
4,Eva White,JavaScript,2


In [2]:
def fitness_function(candidate_years_exp, skills, hr_requirements):
    experience_difference = 0
    for row, skill in enumerate(skills):
        required_exp = hr_requirements.get(skill, 0) 
        experience_difference += max(0, required_exp - candidate_years_exp[row])  
    return experience_difference

In [3]:
skills_list = list(hr_requirements.keys())  # List of skills HR wants
experience_data = []
candidate_names = []
for _, row in jobs_df.iterrows():
    candidate_skills = row['Skills'].split(", ") 
    candidate_experience = row['Years of Experience']
    
    # Check if the candidate has the required skills
    if all(skill in candidate_skills for skill in skills_list):
        candidate_experience_per_skill = [
            candidate_experience if skill in candidate_skills else 0 for skill in skills_list
        ]
        experience_data.append(candidate_experience_per_skill)
        candidate_names.append(row['Name'])  

In [4]:
if len(experience_data) == 0 or len(candidate_names) == 0:
    print("No candidates found with the required skills.")
else:
    experience_data = np.array(experience_data)

    initial_guess = [5] * len(skills_list) 
    es = cma.CMAEvolutionStrategy(initial_guess, 0.5) 

    # Run the optimization process
    for _ in range(100):  
        solutions = es.ask()
        fitness_values = [fitness_function(solution, skills_list, hr_requirements) for solution in solutions]
        es.tell(solutions, fitness_values)

    # Get the best candidate from the optimization
    best_candidate_index = np.argmin(fitness_values)  
    best_candidate = experience_data[best_candidate_index]
    best_candidate_name = candidate_names[best_candidate_index]
    best_fitness = fitness_function(best_candidate, skills_list, hr_requirements)

    # Output the best match candidate's name, skills, experience level, and fitness score
    best_candidate_skills = {skills_list[i]: best_candidate[i] for i in range(len(skills_list))}
    print(f"Best candidate found: {best_candidate_name}")
    print("Skills and experience level:")
    for skill, exp in best_candidate_skills.items():
        print(f"{skill}: {exp:.2f} years")
    print(f"Fitness score (lower is better): {best_fitness:.2f}")

No candidates found with the required skills.


In [5]:
jobs_df

Unnamed: 0,Name,Skills,Years of Experience
0,Alice Johnson,Python,5
1,Bob Smith,Python,3
2,Charlie Brown,Python,1
3,David Wilson,JavaScript,10
4,Eva White,JavaScript,2
5,Frank Black,Python,8
6,Grace Green,React,4
7,Henry Grey,TensorFlow,6
8,Ian Blue,React,7
9,Julia Red,Python,5


In [13]:
import pandas as pd
import numpy as np
import cma  # Covariance Matrix Adaptation Evolution Strategy

# Load the dataset
jobs_df = pd.read_excel('jobs.xlsx')

# HR provides a dictionary with required skills and corresponding experience levels
hr_requirements = {
    'React': 5,         # 5+ years experience in React
    'Python': 3,        # 3+ years experience in Python
    'JavaScript': 10,   # 10+ years experience in JavaScript
}

def fitness_function(candidate_years_exp, skills, hr_requirements):
    experience_difference = 0
    for row, skill in enumerate(skills):
        required_exp = hr_requirements.get(skill, 0)
        experience_difference += max(0, required_exp - candidate_years_exp[row])  
    return experience_difference

skills_list = list(hr_requirements.keys())  # List of skills HR wants
experience_data = []
candidate_names = []

# Populate experience_data and candidate_names
for _, row in jobs_df.iterrows():
    candidate_skills = row['Skills'].split(",")  # No space after comma
    candidate_experience = row['Years of Experience']
    
    # Debugging: Print candidate information
    # print(f"Candidate: {row['Name']}, Skills: {candidate_skills}, Experience: {candidate_experience}")

    # Create an experience vector for each candidate based on the skills list
    candidate_experience_per_skill = [
        candidate_experience if skill in candidate_skills else 0 for skill in skills_list
    ]
    
    # Check if the candidate has any of the required skills
    if any(skill in candidate_skills for skill in skills_list):
        experience_data.append(candidate_experience_per_skill)
        candidate_names.append(row['Name'])

if len(experience_data) == 0 or len(candidate_names) == 0:
    print("No candidates found with the required skills.")
else:
    experience_data = np.array(experience_data)

    # Initialize CMA-ES with an initial guess for experience levels
    initial_guess = np.zeros(len(skills_list))  # Starting point for optimization
    es = cma.CMAEvolutionStrategy(initial_guess, 0.5) 

    # Run the optimization process
    for _ in range(100):  # Run 100 iterations
        solutions = es.ask()
        fitness_values = [fitness_function(solution, skills_list, hr_requirements) for solution in solutions]
        es.tell(solutions, fitness_values)

    # Get the best candidate from the optimization
    best_candidate_index = np.argmin(fitness_values)  
    best_candidate = experience_data[best_candidate_index]
    best_candidate_name = candidate_names[best_candidate_index]
    best_fitness = fitness_function(best_candidate, skills_list, hr_requirements)

    # Prepare the ranking list of all candidates based on fitness scores
    fitness_scores = [fitness_function(exp, skills_list, hr_requirements) for exp in experience_data]
    ranked_candidates = sorted(zip(candidate_names, experience_data, fitness_scores), key=lambda x: x[2])

    # Output the ranked list
    print("Ranking of candidates:")
    for rank, (name, skills_exp, score) in enumerate(ranked_candidates, start=1):
        skills_output = ', '.join(f"{skills_list[i]}: {skills_exp[i]:.2f} years" for i in range(len(skills_list)))
        print(f"{rank}. {name} | {skills_output} | Fitness score: {score:.2f}")


(3_w,7)-aCMA-ES (mu_w=2.3,w_1=58%) in dimension 3 (seed=634238, Mon Oct 21 13:02:37 2024)
Ranking of candidates:
1. David Wilson | React: 0.00 years, Python: 0.00 years, JavaScript: 10.00 years | Fitness score: 8.00
2. Ethan Cream | React: 0.00 years, Python: 0.00 years, JavaScript: 10.00 years | Fitness score: 8.00
3. Tina Walnut | React: 0.00 years, Python: 0.00 years, JavaScript: 10.00 years | Fitness score: 8.00
4. Quinn Bronze | React: 0.00 years, Python: 0.00 years, JavaScript: 8.00 years | Fitness score: 10.00
5. Steve Teal | React: 0.00 years, Python: 0.00 years, JavaScript: 7.00 years | Fitness score: 11.00
6. Charlie Lime | React: 0.00 years, Python: 0.00 years, JavaScript: 7.00 years | Fitness score: 11.00
7. Nina Walnut | React: 0.00 years, Python: 0.00 years, JavaScript: 7.00 years | Fitness score: 11.00
8. Oscar Gold | React: 0.00 years, Python: 0.00 years, JavaScript: 6.00 years | Fitness score: 12.00
9. Zachary Mint | React: 0.00 years, Python: 0.00 years, JavaScript: 6