In [6]:
# import libraries
from fastapi import FastAPI, UploadFile, File
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from typing import List

In [7]:
app = FastAPI()

def load_json_file(file: UploadFile):
    """Loads JSON data from an uploaded file into a Pandas DataFrame."""
    data = json.load(file.file)
    return pd.DataFrame(data)

def process_column(value):
    """Flattens nested lists and converts everything to a string."""
    if isinstance(value, list):
        flat_list = []
        for item in value:
            if isinstance(item, list):
                flat_list.extend(item)
            else:
                flat_list.append(item)
        return ' '.join(map(str, flat_list))
    return str(value)

# **TF-IDF Vectorizer , Cosine similarity and top-N**

In [8]:
def recommend_jobs(resume_df, jobs_df, top_n=5):
    """Recommends jobs based on a resume using TF-IDF."""
    resume_text = resume_df.apply(lambda row: ' '.join([
        process_column(row['skills']),
        process_column(row['institution']),
        process_column(row['degree_names']),
        process_column(row['field_of_study']),
        process_column(row['experience_related_skills']),
        process_column(row['experience_positions']),
        process_column(row['experience_responsibilities']),
    ]), axis=1).values

    jobs_text = jobs_df.apply(lambda row: ' '.join([
        process_column(row['position']),
        process_column(row['job_role_and_duties']),
        process_column(row['requisite_skill']),
        process_column(row['offer_details'])
    ]), axis=1).values

    all_text = pd.Series(list(resume_text) + list(jobs_text))
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(all_text)
    resume_tfidf = tfidf_matrix[:len(resume_df)]
    jobs_tfidf = tfidf_matrix[len(resume_df):]

    cosine_similarities = cosine_similarity(resume_tfidf, jobs_tfidf)

    if cosine_similarities.size > 0:
        recommended_job_indices = cosine_similarities.argsort(axis=1)[:, ::-1][0]
        num_recommendations = min(top_n, len(recommended_job_indices))
        recommended_job_indices = recommended_job_indices[:num_recommendations]
        recommended_jobs = jobs_df.iloc[recommended_job_indices.tolist()]
    else:
        recommended_jobs = pd.DataFrame()
    
    return recommended_jobs.to_dict(orient='records')

@app.post("/recommend")
async def get_recommendations(resume_file: UploadFile = File(...), job_file: UploadFile = File(...), top_n: int = 5):
    resume_df = load_json_file(resume_file)
    jobs_df = load_json_file(job_file)
    recommendations = recommend_jobs(resume_df, jobs_df, top_n)
    return {"recommendations": recommendations}

In [9]:
# Sample JSON Data for Testing
jobs_json = {
    "Job Id": [1017340707950150],
    "workplace": ["panama city panama"],
    "working_mode": ["contract"],
    "salary": [69500.0],
    "position": ["procurement manager"],
    "job_role_and_duties": [["promote", "supplier", "diversity", "initiatives"]],
    "requisite_skill": [["supplier", "diversity", "assessment"]],
    "offer_details": [["transportation", "benefits"]]
}
jobs_df = pd.DataFrame(jobs_json)

resume_json = [{
    "skills": [["Big Data", "Hadoop", "Hive", "Python"]],
    "institution": "the amity school of engineering & technology (aset), noida",
    "degree_names": "b.tech",
    "graduation_year": 2019,
    "field_of_study": "electronics",
    "experience_related_skills": [["Big Data"]],
    "experience_positions": [["Big Data Analyst"]],
    "experience_responsibilities": [["Technical Support", "Troubleshooting"]]
}]
resume_df = pd.DataFrame(resume_json)

# Call Function
recommendations = recommend_jobs(resume_df, jobs_df)
print(recommendations)

[{'Job Id': 1017340707950150, 'workplace': 'panama city panama', 'working_mode': 'contract', 'salary': 69500.0, 'position': 'procurement manager', 'job_role_and_duties': ['promote', 'supplier', 'diversity', 'initiatives'], 'requisite_skill': ['supplier', 'diversity', 'assessment'], 'offer_details': ['transportation', 'benefits']}]


# **Measure the execution time:** 

In [10]:
import time

start_time = time.time()
recommendations = recommend_jobs(resume_df, jobs_df)
end_time = time.time()

execution_time = end_time - start_time
print(f"Time taken to recommend jobs: {execution_time:.4f} seconds")
print(recommendations)

Time taken to recommend jobs: 0.0167 seconds
[{'Job Id': 1017340707950150, 'workplace': 'panama city panama', 'working_mode': 'contract', 'salary': 69500.0, 'position': 'procurement manager', 'job_role_and_duties': ['promote', 'supplier', 'diversity', 'initiatives'], 'requisite_skill': ['supplier', 'diversity', 'assessment'], 'offer_details': ['transportation', 'benefits']}]
