### Importing Libraries

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### Loading dataset

In [2]:
df=pd.read_csv('job_final.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,url,Position,Company,Location,Job_Description
0,0,https://www.glassdoor.co.in/partner/jobListing...,Software Testing Internship,Smart Food Safe Solutions Inc,– Bengaluru,About the company:\nSmart Food Safe Solutions ...
1,1,https://www.glassdoor.co.in/partner/jobListing...,Embedded Software Testing,Mobiveil,– Bengaluru,Location : Bangalore\nExperience : 4+ Years\n\...
2,2,https://www.glassdoor.co.in/partner/jobListing...,Senior Engineer - Software Testing (Bangalore ...,Open Systems International,– Bengaluru,"Open Systems International, Inc. (OSI) www.osi..."
3,3,https://www.glassdoor.co.in/partner/jobListing...,Software Testing Engineer,Bloom Solutions,– Bengaluru,About the Job\n\nSoftware Testing Engineer\n\n...
4,4,https://www.glassdoor.co.in/partner/jobListing...,CIEL/SEL/1888: Software testing Engineer,CIEL HR Services,– Bengaluru,Location: Bangalore\nExperience: 3 to 6Years\n...


### Handling missing values

In [37]:
import pandas as pd

# Ensure all Job_Description entries are strings and fill NaN values
df['Job_Description'] = df['Job_Description'].fillna('').astype(str)

# Create a unique job_id column using the DataFrame index
df['job_id'] = df.index

# Display the first few rows of the dataframe to confirm the changes
df.head()


Unnamed: 0.1,Unnamed: 0,url,Position,Company,Location,Job_Description,job_id
0,0,https://www.glassdoor.co.in/partner/jobListing...,Software Testing Internship,Smart Food Safe Solutions Inc,– Bengaluru,About the company:\nSmart Food Safe Solutions ...,0
1,1,https://www.glassdoor.co.in/partner/jobListing...,Embedded Software Testing,Mobiveil,– Bengaluru,Location : Bangalore\nExperience : 4+ Years\n\...,1
2,2,https://www.glassdoor.co.in/partner/jobListing...,Senior Engineer - Software Testing (Bangalore ...,Open Systems International,– Bengaluru,"Open Systems International, Inc. (OSI) www.osi...",2
3,3,https://www.glassdoor.co.in/partner/jobListing...,Software Testing Engineer,Bloom Solutions,– Bengaluru,About the Job\n\nSoftware Testing Engineer\n\n...,3
4,4,https://www.glassdoor.co.in/partner/jobListing...,CIEL/SEL/1888: Software testing Engineer,CIEL HR Services,– Bengaluru,Location: Bangalore\nExperience: 3 to 6Years\n...,4


### Skills Extraction fromjob descriptions

In [38]:
# Function to extract skills from job descriptions
def extract_skills(job_description):
    skills = []
    keywords = ['Python', 'Machine learning', 'Artificial Intelligence', 'Blockchain', 'IoT', 'Cloud Computing', 'Software Testing', 'Embedded']
    for word in keywords:
        if word.lower() in job_description.lower():
            skills.append(word)
    return skills

df['Skills'] = df['Job_Description'].apply(extract_skills)

# Function to extract experience from job descriptions
def extract_experience(job_description):
    import re
    match = re.search(r'(\d+)\+? Years?', job_description)
    if match:
        return int(match.group(1))
    return 0

df['Experience'] = df['Job_Description'].apply(extract_experience)

# Display the dataframe with the new columns
df[['Position', 'Skills', 'Experience', 'Location', 'job_id']].head()


Unnamed: 0,Position,Skills,Experience,Location,job_id
0,Software Testing Internship,"[Machine learning, Artificial Intelligence, Bl...",0,– Bengaluru,0
1,Embedded Software Testing,"[Python, Embedded]",4,– Bengaluru,1
2,Senior Engineer - Software Testing (Bangalore ...,"[Python, Software Testing]",0,– Bengaluru,2
3,Software Testing Engineer,[Software Testing],0,– Bengaluru,3
4,CIEL/SEL/1888: Software testing Engineer,[],0,– Bengaluru,4


### Content-based filtering is a method of making recommendations based on the content of items rather than user interactions or ratings. In this case, the content is the job descriptions. The primary approach involves using the textual data to understand the similarity between items and recommend similar items to the user.
### Methods:
1. TF-IDF Vectorization: TF-IDF (Term Frequency-Inverse Document Frequency): This transforms textual job descriptions into numerical representations.
2. Cosine Similarity: Measures the cosine of the angle between two vectors. In the context of TF-IDF vectors, it measures the similarity between two job descriptions.
3. Content-Based Job Recommendation: get_recommendations(title, cosine_sim=cosine_sim): This function finds jobs similar to a specified job title using cosine similarity
4. Enhanced Recommendations with User Feedback (continued): compute_similarity_with_feedback(job_id, user_feedback): Enhances content-based filtering by incorporating user feedback.

In [41]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Vectorize job descriptions
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Job_Description'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to compute similarity with feedback
def compute_similarity_with_feedback(job_id, user_feedback):
    job_index = df[df['job_id'] == job_id].index[0]
    job_vector = tfidf_matrix[job_index]

    feedback_indices = [df[df['job_id'] == feedback['job_id']].index[0] for feedback in user_feedback]
    feedback_vectors = tfidf_matrix[feedback_indices]
    feedback_ratings = [feedback['rating'] for feedback in user_feedback]

    similarity_scores = cosine_similarity(job_vector, feedback_vectors).flatten()
    weighted_similarity = sum(similarity * rating for similarity, rating in zip(similarity_scores, feedback_ratings))
    
    return weighted_similarity

# Function to get job recommendations based on job title
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = df[df['Position'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Get top 5 similar jobs
    job_indices = [i[0] for i in sim_scores]
    return df.iloc[job_indices]

# Function to recommend jobs based on user inputs and feedback
def recommend_jobs(user_skills, user_experience, user_location, user_feedback=None):
    # Filter jobs based on user location
    location_filtered_df = df[df['Location'].str.contains(user_location, case=False, na=False)]
    
    # Score jobs based on skills match, experience, and user feedback
    def score_job(row):
        skill_score = len(set(user_skills) & set(row['Skills']))
        experience_score = max(0, user_experience - row['Experience'])
        feedback_score = compute_similarity_with_feedback(row['job_id'], user_feedback) if user_feedback else 0
        return skill_score * 2 - experience_score + feedback_score
    
    location_filtered_df['Score'] = location_filtered_df.apply(score_job, axis=1)
    top_jobs = location_filtered_df.sort_values(by='Score', ascending=False).head(5)
    return top_jobs

# Example usage for job recommendations based on title
print(get_recommendations('Software Testing Internship'))

# Example usage for user-based job recommendations
user_skills = ['Python', 'Machine learning']
user_experience = 2
user_location = 'Bengaluru'
user_feedback = [{'job_id': 0, 'rating': 5}, {'job_id': 1, 'rating': 3}]

print(recommend_jobs(user_skills, user_experience, user_location, user_feedback))


     Unnamed: 0                                                url  \
139         143  https://www.glassdoor.co.in/partner/jobListing...   
79           80  https://www.glassdoor.co.in/partner/jobListing...   
46           46  https://www.glassdoor.co.in/partner/jobListing...   
111         115  https://www.glassdoor.co.in/partner/jobListing...   
11           11  https://www.glassdoor.co.in/partner/jobListing...   

                                              Position  \
139              Data Science and Analytics Internship   
79   Machine Learning part time job/internship at B...   
46        Machine Learning And Data Science Internship   
111                        Software Testing Internship   
11                         Software Testing Internship   

                                       Company      Location  \
139     TECHCEPTRON TECHNOLOGY PRIVATE LIMITED        – Pune   
79              AIMonk Labs Technology Limited   – Bengaluru   
46   Tangerine Innovation Labs Private

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location_filtered_df['Score'] = location_filtered_df.apply(score_job, axis=1)
