In [None]:
import pandas as pd

In [None]:
jobs = pd.read_csv('Jobs.csv')
jobs.head()

Unnamed: 0,Job Position,Job Skills,Job Location,Experience
0,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years
1,Data Scientist,"Machine Learning, Statistics, Python, SQL","San Francisco, USA",1-2 years
2,Sales Executive,"Communication, Negotiation, CRM, Lead Generation","London, UK",0-3 years
3,Security Guard,"Surveillance, Patrolling, Emergency Response","Mumbai, India",0-2 years
4,Graphic Designer,"Photoshop, Illustrator, UI/UX, Creativity","Berlin, Germany",3-5 years


In [None]:
resume = pd.read_csv('Resume.csv')
resume.head()

Unnamed: 0,Name,Position,Skills,Location,Experience
0,John Smith,Software Engineer,"Python, Java, React, SQL, Git","Boston, USA",2 years
1,Emily Johnson,Software Engineer,"Python, C++, JavaScript, AWS, Git","Los Angeles, USA",1 years
2,William Carter,Software Engineer,"C++, Kubernetes, Docker, Microservices","Dallas, USA",0 years
3,Rahul Sharma,Data Scientist,"Machine Learning, Statistics, Python, SQL","Chicago, USA",0 years
4,Aisha Khan,Data Scientist,"Deep Learning, NLP, Python, R","Toronto, Canada",3 years


In [None]:
resume.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131 entries, 0 to 130
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        131 non-null    object
 1   Position    131 non-null    object
 2   Skills      131 non-null    object
 3   Location    131 non-null    object
 4   Experience  131 non-null    object
dtypes: object(5)
memory usage: 5.2+ KB


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
import re

def extract_years(exp_str):
    """Extracts the numeric experience value from strings like '2 years' or '1 year'."""
    match = re.search(r'\d+', str(exp_str))  # Find first number in string
    return int(match.group()) if match else 0  # Default to 0 if no match

def calculate_experience_score(job_exp_range, candidate_exp):
    # Extract numeric values
    job_exp_values = [extract_years(exp) for exp in str(job_exp_range).split('-')]

    # If job experience is a single value instead of a range, assume min_exp = max_exp
    min_exp, max_exp = (job_exp_values[0], job_exp_values[-1]) if len(job_exp_values) > 1 else (job_exp_values[0], job_exp_values[0])

    candidate_exp = extract_years(candidate_exp)

    # Scoring Logic
    if candidate_exp >= max_exp:
        return 22 + min(3, candidate_exp - max_exp)  # Bonus for more experience
    elif candidate_exp >= min_exp:
        return 20  # Within range
    else:
        return 10 + max(0, (candidate_exp - min_exp) * 2)  # Penalty for less experience


In [None]:
def calculate_location_score(job_location, candidate_location):
    if job_location == candidate_location:
        return 30  # Exact match
    elif job_location.split(',')[-1].strip() == candidate_location.split(',')[-1].strip():
        return 20  # Same country, different city
    else:
        return 10  # Completely different location

In [None]:
def calculate_skill_similarity(job_skills, candidate_skills):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([job_skills, candidate_skills])
    similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    return similarity * 50  # Scale to 50


In [None]:
merged_df = jobs.merge(resume, left_on='Job Position', right_on='Position', suffixes=('_job', '_candidate'))

In [None]:
merged_df['Skill Score'] = merged_df.apply(lambda x: calculate_skill_similarity(x['Job Skills'], x['Skills']), axis=1)

In [None]:
merged_df['Location Score'] = merged_df.apply(lambda x: calculate_location_score(x['Job Location'], x['Location']), axis=1)

In [None]:
merged_df.head()

Unnamed: 0,Job Position,Job Skills,Job Location,Experience_job,Name,Position,Skills,Location,Experience_candidate,Skill Score,Location Score
0,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,John Smith,Software Engineer,"Python, Java, React, SQL, Git","Boston, USA",2 years,50.0,20
1,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Emily Johnson,Software Engineer,"Python, C++, JavaScript, AWS, Git","Los Angeles, USA",1 years,14.560971,20
2,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,William Carter,Software Engineer,"C++, Kubernetes, Docker, Microservices","Dallas, USA",0 years,0.0,20
3,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Marco Romano,Software Engineer,"JavaScript, Node.js, GraphQL, TypeScript","Milan, Italy",5 years,0.0,10
4,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Amelia Collins,Software Engineer,"Swift, Kotlin, Mobile App Development","Melbourne, Australia",1 years,0.0,10


In [None]:
merged_df['Experience Score'] = merged_df.apply(lambda x: calculate_experience_score(x['Experience_job'], x['Experience_candidate']), axis=1)

In [None]:
merged_df.head()

Unnamed: 0,Job Position,Job Skills,Job Location,Experience_job,Name,Position,Skills,Location,Experience_candidate,Skill Score,Location Score,Experience Score
0,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,John Smith,Software Engineer,"Python, Java, React, SQL, Git","Boston, USA",2 years,50.0,20,22
1,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Emily Johnson,Software Engineer,"Python, C++, JavaScript, AWS, Git","Los Angeles, USA",1 years,14.560971,20,20
2,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,William Carter,Software Engineer,"C++, Kubernetes, Docker, Microservices","Dallas, USA",0 years,0.0,20,20
3,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Marco Romano,Software Engineer,"JavaScript, Node.js, GraphQL, TypeScript","Milan, Italy",5 years,0.0,10,25
4,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Amelia Collins,Software Engineer,"Swift, Kotlin, Mobile App Development","Melbourne, Australia",1 years,0.0,10,20


In [None]:
# Final Score Calculation
merged_df['Final Score'] = merged_df['Skill Score'] + merged_df['Location Score'] + merged_df['Experience Score']

In [None]:
# Normalize to 1-100 scale
merged_df['Final Score'] = merged_df['Final Score'].apply(lambda x: min(100, max(1, x)))

In [None]:
final_df.head()

Unnamed: 0,Position,Job Skills,Name,Skills,Final Score
0,Software Engineer,"Python, Java, React, SQL, Git",John Smith,"Python, Java, React, SQL, Git",92.0
1,Software Engineer,"Python, Java, React, SQL, Git",Emily Johnson,"Python, C++, JavaScript, AWS, Git",54.560971
2,Software Engineer,"Python, Java, React, SQL, Git",William Carter,"C++, Kubernetes, Docker, Microservices",40.0
3,Software Engineer,"Python, Java, React, SQL, Git",Marco Romano,"JavaScript, Node.js, GraphQL, TypeScript",35.0
4,Software Engineer,"Python, Java, React, SQL, Git",Amelia Collins,"Swift, Kotlin, Mobile App Development",30.0


In [None]:
merged_df.head()

Unnamed: 0,Job Position,Job Skills,Job Location,Experience_job,Name,Position,Skills,Location,Experience_candidate,Skill Score,Location Score,Experience Score,Final Score
0,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,John Smith,Software Engineer,"Python, Java, React, SQL, Git","Boston, USA",2 years,50.0,20,22,92.0
1,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Emily Johnson,Software Engineer,"Python, C++, JavaScript, AWS, Git","Los Angeles, USA",1 years,14.560971,20,20,54.560971
2,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,William Carter,Software Engineer,"C++, Kubernetes, Docker, Microservices","Dallas, USA",0 years,0.0,20,20,40.0
3,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Marco Romano,Software Engineer,"JavaScript, Node.js, GraphQL, TypeScript","Milan, Italy",5 years,0.0,10,25,35.0
4,Software Engineer,"Python, Java, React, SQL, Git","New York, USA",0-2 years,Amelia Collins,Software Engineer,"Swift, Kotlin, Mobile App Development","Melbourne, Australia",1 years,0.0,10,20,30.0


In [None]:
merged_df.shape

(128, 13)

In [None]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Job Position          128 non-null    object 
 1   Job Skills            128 non-null    object 
 2   Job Location          128 non-null    object 
 3   Experience_job        128 non-null    object 
 4   Name                  128 non-null    object 
 5   Position              128 non-null    object 
 6   Skills                128 non-null    object 
 7   Location              128 non-null    object 
 8   Experience_candidate  128 non-null    object 
 9   Skill Score           128 non-null    float64
 10  Location Score        128 non-null    int64  
 11  Experience Score      128 non-null    int64  
 12  Final Score           128 non-null    float64
dtypes: float64(2), int64(2), object(9)
memory usage: 13.1+ KB


In [None]:
# Select final columns
final_df = merged_df[['Position', 'Job Skills','Experience_job', 'Job Location', 'Name', 'Skills','Experience_candidate', 'Location', 'Final Score']]


In [None]:
final_df.head()

Unnamed: 0,Position,Job Skills,Experience_job,Job Location,Name,Skills,Experience_candidate,Location,Final Score
0,Software Engineer,"Python, Java, React, SQL, Git",0-2 years,"New York, USA",John Smith,"Python, Java, React, SQL, Git",2 years,"Boston, USA",92.0
1,Software Engineer,"Python, Java, React, SQL, Git",0-2 years,"New York, USA",Emily Johnson,"Python, C++, JavaScript, AWS, Git",1 years,"Los Angeles, USA",54.560971
2,Software Engineer,"Python, Java, React, SQL, Git",0-2 years,"New York, USA",William Carter,"C++, Kubernetes, Docker, Microservices",0 years,"Dallas, USA",40.0
3,Software Engineer,"Python, Java, React, SQL, Git",0-2 years,"New York, USA",Marco Romano,"JavaScript, Node.js, GraphQL, TypeScript",5 years,"Milan, Italy",35.0
4,Software Engineer,"Python, Java, React, SQL, Git",0-2 years,"New York, USA",Amelia Collins,"Swift, Kotlin, Mobile App Development",1 years,"Melbourne, Australia",30.0


In [None]:
final_df.to_csv("UseFul_Dataset.csv", index=False)