# Match skills inputted by user to job offers

In [9]:
import pandas as pd
import json
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer

# Load the dataset
jobs_df = pd.read_csv('it-analysis-main/DataStore/jobData.csv', on_bad_lines='skip')
jobs_df.head()

Unnamed: 0,Position,Company,Experience,Salary,Used Technologies,Optional Technologies
0,Embedded Software Engineer,Fluke Corportaion,mid,13000 18000,"{'C': 'regular', 'C++': 'regular', 'Linux': 'r...",-
1,Azure Platform Engineer (AI Department),Procter & Gamble,mid,-,"{'Microsoft Azure': 'advanced', 'Python': 'adv...",-
2,.NET Developer,UN7,mid,20000 28000,{'.Net': 'advanced'},-
3,Spec. Projektant Robotyzacji Procesów,Credit Agricole Bank Polska S.A.,mid,-,"{'C#': 'regular', 'VB.Net': 'regular', 'Java':...",-
4,HT Functional Consultant with ERP Industry Sol...,Accenture,mid,-,"{'English': 'master', 'ERP': 'advanced', 'Prob...",-


In [21]:
# Function to parse the 'Used Technologies' column
def parse_technologies(tech_entry):
    if isinstance(tech_entry, str):
        # Handling the dictionary format
        if tech_entry.startswith('{'):
            try:
                tech_dict = json.loads(tech_entry.replace("'", "\""))
                return list(tech_dict.keys())
            except json.JSONDecodeError:
                return []
        # Handling the comma-separated string format
        else:
            return tech_entry.split(',')
    return []

# Applying the function to parse skills
jobs_df['Skills'] = jobs_df['Used Technologies'].apply(parse_technologies)

# One-hot encoding the skills
mlb = MultiLabelBinarizer()
skills_encoded = mlb.fit_transform(jobs_df['Skills'])
skills_encoded_df = pd.DataFrame(skills_encoded, columns=mlb.classes_)

# Function to calculate similarity and classify matches
def classify_match(user_skills_vector, job_skills_vector, thresholds=(0.75, 0.5)):
    similarity = cosine_similarity([user_skills_vector], [job_skills_vector])[0][0]
    if similarity >= thresholds[0]:
        return 'good match'
    elif similarity >= thresholds[1]:
        return 'moderate match'
    else:
        return 'poor match'

# Example usage
user_skills = ['C++', 'C#']  # Example user skills
user_skills_vector = mlb.transform([user_skills])[0]

# Classify matches for each job
jobs_df['Match Classification'] = [
    classify_match(user_skills_vector, job_vector) 
    for job_vector in skills_encoded_df.values
]

# Display the results
print('good match')
display(jobs_df[jobs_df['Match Classification'] == 'good match'].head())
print('moderate match')
display(jobs_df[jobs_df['Match Classification'] == 'moderate match'].head())
print('poor match')
display(jobs_df[jobs_df['Match Classification'] == 'poor match'].head())
# ['Position', 'Match Classification']

good match


Unnamed: 0,Position,Company,Experience,Salary,Used Technologies,Optional Technologies,Skills,Match Classification


moderate match


Unnamed: 0,Position,Company,Experience,Salary,Used Technologies,Optional Technologies,Skills,Match Classification
69,Software Developer,Luxoft Poland,senior,-,"{'C#': 'advanced', 'Python': 'advanced', 'SQL'...",-,"[C#, Python, SQL, C++, Fixed Income space]",moderate match
152,Programista C++ (karty mikroprocesorowe/ krypt...,ENIGMA Systemy Ochrony Informacji Sp. z o.o.,Mid,-,C++,"C, Python, Yocto, UNIX/Linux, RTOS, ARM, Xilin...",[C++],moderate match
158,Senior C++ Software Engineer,Samsung R&D Institute Poland,Senior,-,C++,,[C++],moderate match
289,Specjalista IT help desk,Mecalux Sp. z o.o.,Mid,-,C#,,[C#],moderate match
379,C++ Developer,ITFS sp. z o.o.,Mid,16000 20800,C++,"Linux, Google Cloud Platform",[C++],moderate match


poor match


Unnamed: 0,Position,Company,Experience,Salary,Used Technologies,Optional Technologies,Skills,Match Classification
0,Embedded Software Engineer,Fluke Corportaion,mid,13000 18000,"{'C': 'regular', 'C++': 'regular', 'Linux': 'r...",-,"[C, C++, Linux, SVN, Yocto, Qt]",poor match
1,Azure Platform Engineer (AI Department),Procter & Gamble,mid,-,"{'Microsoft Azure': 'advanced', 'Python': 'adv...",-,"[Microsoft Azure, Python, DevOps, CI/CD, GitHub]",poor match
2,.NET Developer,UN7,mid,20000 28000,{'.Net': 'advanced'},-,[.Net],poor match
3,Spec. Projektant Robotyzacji Procesów,Credit Agricole Bank Polska S.A.,mid,-,"{'C#': 'regular', 'VB.Net': 'regular', 'Java':...",-,"[C#, VB.Net, Java, VBA]",poor match
4,HT Functional Consultant with ERP Industry Sol...,Accenture,mid,-,"{'English': 'master', 'ERP': 'advanced', 'Prob...",-,"[English, ERP, Problem Solving, Communication ...",poor match
