In [None]:
import pandas as pd

def jaccard_similarity(set1, set2):
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return intersection / union if union != 0 else 0

def recommend_employers(worker_skills_str, worker_location, employers_df):

    # Convert worker's skills string to a set, stripping any extra spaces
    worker_skills = set([skill.strip() for skill in worker_skills_str.split(',')])
    # Calculate the maximum offered wage in the dataset for normalization
    max_wage = employers_df['Offered_Wage'].max()
    # Filter employers that have at least one job category matching the worker's skills
    matching_employers = employers_df[employers_df['Job_Categories'].apply(
        lambda x: len(set([cat.strip() for cat in x.split(',')]).intersection(worker_skills)) > 0
    )].copy()

    if matching_employers.empty:
        return pd.DataFrame(columns=employers_df.columns)

    # Jaccard similarity between worker's skills and employer's job categories
    matching_employers['jaccard'] = matching_employers['Job_Categories'].apply(
        lambda x: jaccard_similarity(worker_skills, set([cat.strip() for cat in x.split(',')]))
    )

    # Binary Location match
    matching_employers['location_match'] = (matching_employers['Location'] == worker_location).astype(int)

    # Normalized offered wage
    matching_employers['norm_wage'] = matching_employers['Offered_Wage'] / max_wage

    # Normalized employer rating
    matching_employers['norm_rating'] = matching_employers['Employer_Rating'] / 200

    # Calculate total relevance score as the sum
    matching_employers['relevance_score'] = (
        matching_employers['jaccard'] +
        matching_employers['location_match'] +
        matching_employers['norm_wage'] +
        matching_employers['norm_rating']
    )

    # Sort employers by relevance score in descending order
    recommended_employers = matching_employers.sort_values(by='relevance_score', ascending=False)

    # Drop extra columns
    recommended_employers = recommended_employers.drop(
        columns=['jaccard', 'location_match', 'norm_wage', 'norm_rating']
    )

    return recommended_employers

Recommended Employers for the Worker:
   Employer_ID     Job_Categories Location  Offered_Wage  Employer_Rating  \
1            2       Electricians  Jakarta        180000              150   
0            1  Plumbers, Welders  Jakarta        200000              120   

   relevance_score  
1         3.150000  
0         2.933333  


In [None]:
workers_df = pd.read_csv("Sample Dataset - workers.csv")
employers_df = pd.read_csv("Sample Dataset - employer.csv")

worker_id = input("Enter Worker ID: ")

worker_row = workers_df[workers_df["Worker_ID"] == int(worker_id)]
if worker_row.empty:
    print("Worker ID not found.")
else:
    worker_location = worker_row["Location"].values[0]
    worker_skills = worker_row["Skills"].values[0]

    recommended_employers = recommend_employers(worker_skills, worker_location, employers_df)

    print(recommended_employers)

Enter Worker ID: 1
    Job_ID  Employer_Rating      Type_of_Work  \
28      29             85.0  Waste collection   
25      26             74.4  Waste collection   
97      98             65.1  Waste collection   
47      48            120.1  Waste collection   
55      56            147.0  Waste collection   
63      64             96.4  Waste collection   
14      15             99.9  Waste collection   
79      80             95.8  Waste collection   
15      16             79.0  Waste collection   
13      14             98.3  Waste collection   
92      93            126.9  Waste collection   
39      40            116.0  Waste collection   
34      35            100.8  Waste collection   
85      86            134.3  Waste collection   
94      95            120.5  Waste collection   
67      68             67.5  Waste collection   
51      52             95.8  Waste collection   

                              Job_Categories       Location  Offered_Wage  \
28                   