In [1]:
# import libraries
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the data
job_data = pd.read_csv("JobsFE.csv")
resume_data = pd.read_csv("Resume Data Cleaned.csv")

In [3]:
resume_data.head()

Unnamed: 0,skills,institution,degree_names,graduation_year,field_of_study,experience_company_names,experience_start_dates,experience_end_dates,experience_related_skills,experience_positions,experience_responsibilities,languages,langauge_proficiency_levels
0,"['Big Data', 'Hadoop', 'Hive', 'Python', 'Mapr...",the amity school of engineering & technology (...,b.tech,2019,electronics,['coca-cola'],['Nov 2019'],['Till Date'],[['Big Data']],['Big Data Analyst'],Technical Support\nTroubleshooting\nCollaborat...,,
1,"['Software Development', 'Machine Learning', '...","birla institute of technology (bit), ranchi",b.tech,2018,electronics/telecommunication,['axis bank limited'],['June 2018'],['Till Date'],"[['Unified Payment Interface', 'Risk Predictio...",['Software Developer (Machine Learning Enginee...,"Trade Marketing Executive\nBrand Visibility, S...",,
2,"['Machine Learning', 'Linear Regression', 'Rid...",djr college and university,b.tech,2020,it,['remiro amio'],['Jan 2019'],['Sep 2019'],[None],['Intern'],iOS Lifecycle\nRequirement Analysis\nNative Fr...,,
3,"['Python', 'Machine Learning', 'MySQL', 'Data ...",nagpur university,b.tech/b.e.,2019,electronics/telecommunication,['amz loans and mortgages erc analytics'],['Jun 2019'],['till date'],"[['Data Analysis', 'Employee Satisfaction', 'H...",['Associate Analyst'],Machinery Maintenance\nTroubleshooting\nReport...,,
4,"['Data Analytics', 'Linear Regression', 'Logis...",raja kashwant singh general college,b.a,2020,economics,"['d&d consultancy', 'tanisha quromba brokerage']","['Jul 2020', 'Dec 2019']","['till date', 'May 2020']","[None, None]","['Associate Consultant', 'Junior Analyst Intern']",Machine Learning Leadership\nCross-Functional ...,,


In [4]:
job_data.head()

Unnamed: 0,Job Id,workplace,working_mode,salary,position,job_role_and_duties,requisite_skill,offer_details
0,1017340707950150,panama city panama,contract,69500.0,procurement manager,"['promote', 'supplier', 'diversity', 'initiati...","['supplier', 'diversity', 'programs', 'diversi...","['transportation', 'benefits', 'professional',..."
1,2421048253959975,tunis tunisia,part-time,84500.0,architectural designer,"['prepare', 'detailed', 'architectural', 'draw...","['architectural', 'drafting', 'autocad', '2d',...","['employee', 'assistance', 'programs', 'eap', ..."
2,1822636506606589,harare zimbabwe,full-time,69500.0,art teacher,"['coordinate', 'art', 'education', 'programs',...","['art', 'education', 'curriculum', 'program', ...","['employee', 'referral', 'programs', 'financia..."
3,3068000579894602,tirana albania,temporary,75500.0,environmental consultant,"['assess', 'environmental', 'impact', 'project...","['environmental', 'impact', 'analysis', 'data'...","['transportation', 'benefits', 'professional',..."
4,1747904829392680,city of baghdad iraq,temporary,90000.0,art teacher,"['coordinate', 'art', 'education', 'programs',...","['art', 'education', 'curriculum', 'program', ...","['employee', 'referral', 'programs', 'financia..."


# **TF-IDF Vectorizer , Cosine similarity and top-N**

In [5]:
def recommend_jobs(resume_df, jobs_df, top_n=5):
    """Recommends jobs based on a resume using TF-IDF."""

    def process_column(value):
        """Flattens nested lists and converts everything to a string."""
        if isinstance(value, list):
            # Flatten only if it's a list of lists
            flat_list = []
            for item in value:
                if isinstance(item, list):
                    flat_list.extend(item)  # Unpack sublist
                else:
                    flat_list.append(item)  # Directly add non-list items
            return ' '.join(map(str, flat_list))  # Convert to string
        return str(value)  # Convert non-lists to string

    # 1. Process resume data
    resume_text = resume_df.apply(lambda row: ' '.join([
        process_column(row['skills']),
        process_column(row['institution']),
        process_column(row['degree_names']),
        process_column(row['field_of_study']),
        process_column(row['experience_related_skills']),
        process_column(row['experience_positions']),
        process_column(row['experience_responsibilities']),
    ]), axis=1).values

    # 2. Process job postings
    jobs_text = jobs_df.apply(lambda row: ' '.join([
        process_column(row['position']),
        process_column(row['job_role_and_duties']),
        process_column(row['requisite_skill']),
        process_column(row['offer_details'])
    ]), axis=1).values

    # 3. Combine resume and job descriptions
    all_text = pd.Series(list(resume_text) + list(jobs_text))

    # 4. TF-IDF Processing
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(all_text)

    resume_tfidf = tfidf_matrix[:len(resume_df)]
    jobs_tfidf = tfidf_matrix[len(resume_df):]

    # 5. Calculate Cosine Similarity
    cosine_similarities = cosine_similarity(resume_tfidf, jobs_tfidf)

    # 6. Get Recommendations
    if cosine_similarities.size > 0:
        recommended_job_indices = cosine_similarities.argsort(axis=1)[:, ::-1][0]
        num_recommendations = min(top_n, len(recommended_job_indices))
        recommended_job_indices = recommended_job_indices[:num_recommendations]

        recommended_jobs = jobs_df.iloc[recommended_job_indices.tolist()]
    else:
        recommended_jobs = pd.DataFrame()
        print("No similar jobs found.")

    return recommended_jobs

# Sample Job Data
jobs_data = {
    'Job Id': [1017340707950150],
    'workplace': ['panama city panama'],
    'working_mode': ['contract'],
    'salary': [69500.0],
    'position': ['procurement manager'],
    'job_role_and_duties': [['promote', 'supplier', 'diversity', 'initiatives']],  # List of list
    'requisite_skill': [['supplier', 'diversity', 'assessment']],  # List of list
    'offer_details': [['transportation', 'benefits']]
}
jobs_df = pd.DataFrame(jobs_data)

# Sample Resume Data
resume_data = {
    'skills': [['Big Data', 'Hadoop', 'Hive', 'Python']],  # List of list
    'institution': 'the amity school of engineering & technology (aset), noida',
    'degree_names': 'b.tech',
    'graduation_year': 2019,
    'field_of_study': 'electronics',
    'experience_related_skills': [['Big Data']],  # List of list
    'experience_positions': [['Big Data Analyst']],  # List of list
    'experience_responsibilities': [['Technical Support', 'Troubleshooting']]
}
resume_df = pd.DataFrame([resume_data])

# Call Function
recommendations = recommend_jobs(resume_df, jobs_df)
print(recommendations)

             Job Id           workplace working_mode   salary  \
0  1017340707950150  panama city panama     contract  69500.0   

              position                          job_role_and_duties  \
0  procurement manager  [promote, supplier, diversity, initiatives]   

                     requisite_skill               offer_details  
0  [supplier, diversity, assessment]  [transportation, benefits]  


# **Measure the execution time:** 

In [6]:
import time

# Start timing
start_time = time.time()

# Call the function
recommendations = recommend_jobs(resume_df, jobs_df)

# End timing
end_time = time.time()

# Calculate and print the execution time
execution_time = end_time - start_time
print(f"Time taken to recommend jobs: {execution_time:.4f} seconds")

# Print recommendations
print(recommendations)

Time taken to recommend jobs: 0.0066 seconds
             Job Id           workplace working_mode   salary  \
0  1017340707950150  panama city panama     contract  69500.0   

              position                          job_role_and_duties  \
0  procurement manager  [promote, supplier, diversity, initiatives]   

                     requisite_skill               offer_details  
0  [supplier, diversity, assessment]  [transportation, benefits]  
