In [17]:

import pandas as pd
import numpy as np
from textblob import TextBlob
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/bikash/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/bikash/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [20]:
student_data = pd.read_csv('pre_processed_data.csv')
job_data = pd.read_csv('pre_processed_jobdata.csv')

def model_implementation(student_id):

    student_info = student_data[student_data['student_id'] == student_id]
    student_tags = [student_info['tags'].values[0]]
    tfidf = TfidfVectorizer(stop_words='english')
    job_vector = tfidf.fit_transform(job_data['job_tags'])
    student_vector = tfidf.transform(student_tags)

    cosine_similarity_score = cosine_similarity(student_vector, job_vector)

    top_indices = cosine_similarity_score[0].argsort()[-4:][::-1]
    top_scores = cosine_similarity_score[0][top_indices]

    sentiment_scores = [TextBlob(desc).sentiment.polarity for desc in job_data['job_tags']]

    weighted_score = cosine_similarity_score[0] * sentiment_scores

    top_indices_with_sentiment = weighted_score.argsort()[-4:][::-1]
    top_scores_with_sentiment = weighted_score[top_indices_with_sentiment]

    recommended_jobs = job_data['title'].iloc[top_indices]
    recommended_jobs = pd.DataFrame({
        'title': job_data['title'].iloc[top_indices],
        'score': top_scores
    })

    recommended_jobs_with_sentiment = pd.DataFrame({
        'title': job_data['title'].iloc[top_indices_with_sentiment],
        'score': top_scores_with_sentiment,
        'sentiment_score': [sentiment_scores[i] for i in top_indices_with_sentiment]
    })
    return recommended_jobs, recommended_jobs_with_sentiment



recommended_jobs, recommended_jobs_with_sentiment = model_implementation('stu_2442')
print("Recommendation without sentiment analysis:")
print(recommended_jobs)

print("\nRecommendation with sentiment analysis:")
print(recommended_jobs_with_sentiment)

Recommendation without sentiment analysis:
                                             title     score
20042             Digital Banking Strategy Manager  0.304980
23578            SENIOR CORPORATE BANKING DIRECTOR  0.294060
23668                             Universal Banker  0.283753
14613  AVP Banking Center Manager (Branch Manager)  0.282938

Recommendation with sentiment analysis:
                                                   title     score  \
24550  HOA Banking Team Leads, Relationship Managers,...  0.072118   
14469  Cons Bnkng Relationship Spec (Relationship Ban...  0.068209   
29809                 Client Service Specialist (Teller)  0.065063   
32893                          Data Analytics Consultant  0.062568   

       sentiment_score  
24550         0.288038  
14469         0.249122  
29809         0.239591  
32893         0.357143  
