<a href="https://colab.research.google.com/github/SankalpC10/LLM/blob/main/Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## TFIDF

In [23]:
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import string
import nltk

# Load datasets
user_details = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/job_recommendation_data/user_details.csv')
job_details = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/job_recommendation_data/job_details.csv')

# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Function to preprocess text
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# Apply preprocessing to the user and job descriptions
user_details['processed_description'] = user_details['user_description'].apply(preprocess_text)
job_details['processed_description'] = job_details['description'].apply(preprocess_text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [24]:
# Use TF-IDF Vectorizer to convert text to numerical features
tfidf_vectorizer = TfidfVectorizer(max_features=5000)

user_tfidf_matrix = tfidf_vectorizer.fit_transform(user_details['processed_description'])
job_tfidf_matrix = tfidf_vectorizer.transform(job_details['processed_description'])

In [25]:
# Calculate cosine similarity between job descriptions and user descriptions
similarity_matrix = cosine_similarity(job_tfidf_matrix, user_tfidf_matrix)

In [26]:
def get_top_candidates_for_job(similarity_matrix, top_n=15):
    top_candidates = {}
    for job_idx in range(similarity_matrix.shape[0]):
        similar_candidates_idx = similarity_matrix[job_idx].argsort()[-top_n:][::-1]
        top_candidates[job_idx] = similar_candidates_idx
    return top_candidates

top_candidates = get_top_candidates_for_job(similarity_matrix)

# Mapping job indices to job_ids and candidate indices to user_ids
job_ids = job_details['job_id'].values
user_ids = user_details['user_id'].values

tfidf_recommendations = {}
for job_idx, candidate_indices in top_candidates.items():
    tfidf_recommendations[job_ids[job_idx]] = user_ids[candidate_indices]

# Output recommendations
for job_id, candidate_ids in tfidf_recommendations.items():
    print(f"Job ID {job_id} - Recommended Candidates: {candidate_ids}")

Job ID e4b85acc-096a-4b04-8b82-31195964276a - Recommended Candidates: ['acca9718-2707-47ab-987c-6bda0a3a9085'
 '70c8e1c5-03dd-4880-a90c-0bac1217d94c'
 'a243fc2b-9511-40c5-a472-bc94291cd92e'
 '47985075-ff0c-49c5-8b2a-8a8f378fbcc2'
 '4991c9d0-2b58-471f-bb0e-ff022d5fe636'
 '57f51bc4-809f-4097-8ce5-3dcf5fc2bff8'
 'f76eefb9-12fb-4bdf-8a1c-30617d8f1114'
 '46594dc6-de08-4afd-825d-bb02ae98882e'
 'd1d69ea4-d26d-4844-902f-5d1a7301729a'
 '807b0154-47d2-4bce-a1ac-49bd66f640a6'
 '6b2e7abf-89c2-40b6-adfa-2054753cd011'
 '41942e30-a9cc-47c9-ab13-9f2b11f45bd0'
 '89bb0eaf-5524-4db2-ba39-de0517b366c9'
 '8a30350c-7ba5-472b-a72f-97bde0ae07b0'
 'fc35c01c-bac3-4398-a8b7-8d0dbc2f4e7a']
Job ID cc2c7f99-fe0b-4ab0-9ea5-a30a773b025e - Recommended Candidates: ['68d88f31-35ce-4961-86f1-deaf19d006ae'
 'd1d69ea4-d26d-4844-902f-5d1a7301729a'
 'cdf45022-066a-47c3-8cc6-5fc286ebb90a'
 '65a4d386-54eb-471d-8a26-b01f64b80d4e'
 '82f3c35e-1ab8-468d-9106-4ec736d9b611'
 '1d4e043e-b34e-4702-9733-d286ec521086'
 '11709cf9-6975-4d9

In [27]:
## Jaccard

In [28]:
# Load datasets
user_details = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/job_recommendation_data/user_details.csv')
job_details = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/job_recommendation_data/job_details.csv')

# Function to preprocess skills
def preprocess_skills(skills_str):
    return set(skills_str.lower().split(','))

# Apply preprocessing to user_skills and requirement_skills
user_details['user_skills'] = user_details['user_skills'].apply(preprocess_skills)
job_details['requirement_skills'] = job_details['requirement_skills'].apply(preprocess_skills)

In [29]:
def jaccard_similarity(set1, set2):
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    return len(intersection) / len(union)

# Calculate Jaccard similarity for each job and each user
def calculate_jaccard_similarity(user_skills, job_skills):
    return jaccard_similarity(user_skills, job_skills)

similarity_scores = []
for job_idx, job_row in job_details.iterrows():
    for user_idx, user_row in user_details.iterrows():
        score = calculate_jaccard_similarity(user_row['user_skills'], job_row['requirement_skills'])
        similarity_scores.append({
            'job_id': job_row['job_id'],
            'user_id': user_row['user_id'],
            'similarity_score': score
        })

similarity_df = pd.DataFrame(similarity_scores)

In [30]:
# Sort the similarity scores in descending order
similarity_df = similarity_df.sort_values(by='similarity_score', ascending=False)

# Get the top candidates for each job
top_n = 15
jaccard_recommendations = similarity_df.groupby('job_id').head(top_n)

#Display recommendations
for job_id, group in jaccard_recommendations.groupby('job_id'):
    print(f"Job ID {job_id} - Recommended Candidates:")
    for _, row in group.iterrows():
        print(f"User ID: {row['user_id']} - Similarity Score: {row['similarity_score']}")

Job ID 01a33345-c1c8-4f67-a031-348867a1d032 - Recommended Candidates:
User ID: fc28d0f3-b22d-4676-8ae8-41c0d623b0da - Similarity Score: 0.10714285714285714
User ID: fbc5e303-66b5-43ce-bbf8-f55108cf8d70 - Similarity Score: 0.1
User ID: cdf45022-066a-47c3-8cc6-5fc286ebb90a - Similarity Score: 0.08695652173913043
User ID: 93662381-a931-4e08-a528-a414e37f3845 - Similarity Score: 0.07142857142857142
User ID: 724e21d5-92e1-469f-acf1-19957d8208dc - Similarity Score: 0.06896551724137931
User ID: a4c5fdda-30a9-4af8-9a4e-ef462eadfac9 - Similarity Score: 0.06666666666666667
User ID: f4cacb81-d1dc-4a20-9303-f23762a6070e - Similarity Score: 0.0625
User ID: dd813746-b436-48c7-9075-f7a3c0f49ef9 - Similarity Score: 0.05555555555555555
User ID: 1d4e043e-b34e-4702-9733-d286ec521086 - Similarity Score: 0.05555555555555555
User ID: a7f7fd9c-4606-4fae-9b5b-72e076a01529 - Similarity Score: 0.05263157894736842
User ID: 42ae542f-f5f5-4c4e-a961-32ff8f8baefe - Similarity Score: 0.05263157894736842
User ID: 1bc2

In [31]:
## BERT

In [32]:
import torch
from transformers import BertTokenizer, BertModel

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Function to encode text using BERT
def encode_text(text_list):
    encoded_input = tokenizer(text_list, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**encoded_input)
    # Use the [CLS] token representation
    embeddings = model_output.last_hidden_state[:, 0, :].numpy()
    return embeddings

# Encode user and job descriptions
user_embeddings = encode_text(user_details['user_description'].tolist())
job_embeddings = encode_text(job_details['description'].tolist())

In [33]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity between job descriptions and user descriptions
similarity_matrix = cosine_similarity(job_embeddings, user_embeddings)

In [34]:
def get_top_candidates_for_job(similarity_matrix, top_n=15):
    top_candidates = {}
    for job_idx in range(similarity_matrix.shape[0]):
        similar_candidates_idx = similarity_matrix[job_idx].argsort()[-top_n:][::-1]
        top_candidates[job_idx] = similar_candidates_idx
    return top_candidates

top_candidates = get_top_candidates_for_job(similarity_matrix)

# Mapping job indices to job_ids and candidate indices to user_ids
job_ids = job_details['job_id'].values
user_ids = user_details['user_id'].values

bert_recommendations = {}
for job_idx, candidate_indices in top_candidates.items():
    bert_recommendations[job_ids[job_idx]] = user_ids[candidate_indices]

# Output recommendations
for job_id, candidate_ids in bert_recommendations.items():
    print(f"Job ID {job_id} - Recommended Candidates: {candidate_ids}")

Job ID e4b85acc-096a-4b04-8b82-31195964276a - Recommended Candidates: ['68d88f31-35ce-4961-86f1-deaf19d006ae'
 '6b2e7abf-89c2-40b6-adfa-2054753cd011'
 '8688ad2a-d6cb-4ee9-8e0c-6647771a573f'
 '5639af50-0ae8-4434-87d2-ce6d1f7b15a9'
 'ab220f82-77d6-422f-9eda-6d49ff1a04c1'
 '4d51d36c-c61f-45ae-9811-87d29b5aab90'
 '07cd2cef-4afc-4ce5-8179-dbccf1f58a8c'
 'fc28d0f3-b22d-4676-8ae8-41c0d623b0da'
 'acca9718-2707-47ab-987c-6bda0a3a9085'
 '105b2f91-0423-4bee-adbc-488ffe871004'
 'f76eefb9-12fb-4bdf-8a1c-30617d8f1114'
 '0f20ec1f-6f02-415a-8661-e9b8c632df79'
 '24175946-860e-4cb7-8ec8-6fc1a3cccbe5'
 '724e21d5-92e1-469f-acf1-19957d8208dc'
 'aa02e7bd-4b9f-4036-ae38-0ca684451a6b']
Job ID cc2c7f99-fe0b-4ab0-9ea5-a30a773b025e - Recommended Candidates: ['376f324a-51a0-48c3-b185-49f89686dd90'
 '47985075-ff0c-49c5-8b2a-8a8f378fbcc2'
 '91789982-702a-40b1-93f6-0564fd654c89'
 '70c8e1c5-03dd-4880-a90c-0bac1217d94c'
 'acca9718-2707-47ab-987c-6bda0a3a9085'
 '0f20ec1f-6f02-415a-8661-e9b8c632df79'
 '55ccb384-e0b9-4f1

In [35]:
# Sentence Transformers

In [36]:
# !pip install sentence_transformers

In [57]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
user_details_path = '/content/drive/MyDrive/Colab Notebooks/job_recommendation_data/user_details.csv'
job_details_path = '/content/drive/MyDrive/Colab Notebooks/job_recommendation_data/job_details.csv'

user_details = pd.read_csv(user_details_path)
job_details = pd.read_csv(job_details_path)

# Extract relevant columns
user_descriptions = user_details['user_description'].fillna('')
job_descriptions = job_details['description'].fillna('')

# Load pre-trained Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode user and job descriptions
user_embeddings = model.encode(user_descriptions.tolist(), convert_to_tensor=True)
job_embeddings = model.encode(job_descriptions.tolist(), convert_to_tensor=True)

# Compute cosine similarities
similarity_matrix = util.pytorch_cos_sim(job_embeddings, user_embeddings).cpu().numpy()

# Function to get top candidates for each job
def get_top_candidates_for_job(similarity_matrix, top_n=15):
    top_candidates = {}
    for job_idx in range(similarity_matrix.shape[0]):
        similar_candidates_idx = similarity_matrix[job_idx].argsort()[-top_n:][::-1]
        top_candidates[job_idx] = similar_candidates_idx
    return top_candidates

top_candidates = get_top_candidates_for_job(similarity_matrix)

# Mapping job indices to job_ids and candidate indices to user_ids
job_ids = job_details['job_id'].values
user_ids = user_details['user_id'].values

st_recommendations = {}
for job_idx, candidate_indices in top_candidates.items():
    st_recommendations[job_ids[job_idx]] = user_ids[candidate_indices]

# Output recommendations
for job_id, candidate_ids in st_recommendations.items():
    print(f"Job ID {job_id} - Recommended Candidates: {candidate_ids}")



Job ID e4b85acc-096a-4b04-8b82-31195964276a - Recommended Candidates: ['acca9718-2707-47ab-987c-6bda0a3a9085'
 '47985075-ff0c-49c5-8b2a-8a8f378fbcc2'
 '105b2f91-0423-4bee-adbc-488ffe871004'
 '6b2e7abf-89c2-40b6-adfa-2054753cd011'
 '57f51bc4-809f-4097-8ce5-3dcf5fc2bff8'
 '89bb0eaf-5524-4db2-ba39-de0517b366c9'
 '0db38a4b-211d-4c0c-b09e-26e0145ecd9e'
 '507d1663-8625-4f71-8ea6-c0e1775bdf40'
 'f0ca594d-56b2-4dd4-8270-5d73aab31b03'
 '4991c9d0-2b58-471f-bb0e-ff022d5fe636'
 '8a332bbf-a00c-4598-8deb-47c98102f638'
 'a243fc2b-9511-40c5-a472-bc94291cd92e'
 'f5c33d30-2c08-4f32-9fdf-2c7df185f1b0'
 '91789982-702a-40b1-93f6-0564fd654c89'
 '8688ad2a-d6cb-4ee9-8e0c-6647771a573f']
Job ID cc2c7f99-fe0b-4ab0-9ea5-a30a773b025e - Recommended Candidates: ['105b2f91-0423-4bee-adbc-488ffe871004'
 '9e084716-2dd9-4109-a130-f379ab7c6bf4'
 '814cce10-9419-4a84-a04d-82dd1e245119'
 '333612a0-bbc1-46a2-ac8d-583a40da6218'
 '880894bd-44c3-46c0-a253-4500db61e946'
 '507d1663-8625-4f71-8ea6-c0e1775bdf40'
 '4991c9d0-2b58-471

In [45]:
jc_recommendation = jaccard_recommendations[jaccard_recommendations['job_id']=='f055ead4-10ff-44cb-acdb-9f31daf9509c']['user_id'].to_list()

In [51]:
tf_recommendation = list(tfidf_recommendations['f055ead4-10ff-44cb-acdb-9f31daf9509c'])

In [61]:
bt_recommendation = list(bert_recommendations['f055ead4-10ff-44cb-acdb-9f31daf9509c'])

In [62]:
stc_recommendation = list(st_recommendations['f055ead4-10ff-44cb-acdb-9f31daf9509c'])

In [129]:
oai_recommendation = [
    "4b2e814d-177a-469c-b058-4894620e78a1",  # Ashish Kumar
    "083482d8-a772-45f3-b302-ff64161e2604",  # Suresh Ramavath
    "68d88f31-35ce-4961-86f1-deaf19d006ae",  # Ashish Sharma
    "0c08b3e0-b698-4225-9bd3-e0dc51835f03",  # Roushan Kumar Jha
    "03962db4-0a3c-458d-a241-044c8b8262cd",  # Pranay M. Fulzele
    "5255a5fd-9420-49e6-9984-93d26c339b7f",  # Narendra Kumar Meena
    "069d1f7c-ff9e-4d04-b464-1411b07b9449",  # Azhar Mahebub Shaikh
    "0f20ec1f-6f02-415a-8661-e9b8c632df79",  # Suchandra Siripuram
    "8112ee99-e618-4e2d-a368-c524cfc0d531",  # Aman Gupta
    "44338869-243d-4582-a0bf-2341e62f6865",  # Kumari Ankita
    "376f324a-51a0-48c3-b185-49f89686dd90",  # Mayank Dhabarde
    "d47e1dd3-1c4c-459b-a863-f4c6b02f511d",  # Akash Patel
    "d73e5c6f-e28d-4c68-be63-9d267c62252d",  # Rahul Jain
    "15bc0596-db65-497b-aaf1-60558b2e8a56",  # Angothu Dheeraj Venkata Sai
    "89bb0eaf-5524-4db2-ba39-de0517b366c9",  # Anirudha Rajodiya
]

In [153]:
# counter
# Combine all lists into one
combined_list = jc_recommendation + tf_recommendation + bt_recommendation + stc_recommendation + oai_recommendation

# Import Counter from collections
from collections import Counter

# Count the frequency of each element
counter = Counter(combined_list)

# Sort the counts in descending order
sorted_counter = counter.most_common()

# Print the results
# for element, count in sorted_counter:
#     print(f"Element: {element}, Count: {count}")

In [131]:
len(sorted_counter)

62

In [132]:
def skill_analysis(user_list):
  for i in user_list:
        try:
            user_info = user_details[user_details['user_id'] == i][['full_name', 'user_skills']].values[0]
            full_name, user_skills = user_info
            print(f"Full Name: {full_name}, Skills: {user_skills}")
        except IndexError:
            # Handle case where user_id is not found
            pass

In [133]:
skill_analysis(tf_recommendation)

Full Name: Jay Dutonde, Skills: {'NumPy', 'Data Science', 'Chemical Engineering', 'Machine Learning', 'Deep Learning', 'CSS', 'Android', 'Chemical', 'ML', 'Data Analysis', 'Python', 'MySQL', 'AI', 'Pandas', 'HTML', 'Statistics'}
Full Name: Ishan Singh, Skills: {'Chemical Engineering', 'React', 'AI', 'Flask', 'NumPy', 'Python', 'HTML', 'Statistics', 'TypeScript', 'Data Analytics', 'PyTorch', 'Machine Learning', 'CSS', 'TensorFlow', 'Sales', 'ML', 'Pandas', 'JavaScript', 'Git', 'Excel', 'Review', 'Market Research', 'Chemical', 'Process Engineering'}
Full Name: Nandini Gurram, Skills: {'Deep Learning', 'AutoCAD', 'Design', 'NLP', 'Web', 'AI', 'Flask', 'NumPy', 'Management', 'Operations', 'Python', 'Content', 'Statistics', 'Machine Learning', 'TensorFlow', 'Pandas', 'Electronics', 'Git', 'SQL', 'Keras'}
Full Name: Satyam Patel, Skills: {'Chemical Engineering', 'React', 'Web', 'Marketing', 'MongoDB', 'Software', 'Python', 'HTML', 'Social Media Marketing', 'Frontend', 'CSS', 'Facebook', 'Sof

In [134]:
skill_analysis(jc_recommendation)

Full Name: Nenavath Rahul , Skills: {'Data Science', 'CAD', 'Thermodynamics', 'Management', 'CSS', 'NLP', 'Electrical', 'Data Analysis', 'ML', 'Python', 'Signal Processing', 'Marketing', 'HTML', 'Git'}
Full Name: Avendra Singh Chandrawacar, Skills: {'NumPy', 'Data Science', 'Data Analytics', 'Machine Learning', 'Excel', 'Management', 'Review', 'Power BI', 'Leadership', 'ML', 'Python', 'Team Management', 'Marketing', 'Pandas', 'SQL', 'AWS', 'Statistics'}
Full Name: Adesh Ashok Shivane, Skills: {'Data Science', 'Machine Learning', 'Software', 'ML', 'Python', 'MySQL', 'HTML'}
Full Name: Pinipe Sree Snigdha Rishitha, Skills: {'Machine Learning', 'AutoCAD', 'automobile', 'Design', 'Software', 'Management', 'NLP', 'Operations', 'Relationship', 'ML', 'Data Analysis', 'Python', 'Marketing', 'AI', 'Pandas', 'Statistics', 'Project Management', 'Data Modeling'}
Full Name: Amit Gupta , Skills: {'Data Science', 'Design', 'Marketing', 'AI', 'automobile', 'Management', 'Power BI', 'Python', 'Tableau'

In [135]:
skill_analysis(bt_recommendation)

Full Name: Vibhusha Sontakke, Skills: {'React Native', 'React', 'AutoCAD', 'Docker', 'GitHub', 'CRM', 'Web', 'AI', 'iOS', 'Management', 'Software', 'Android', 'Blogging', 'Node', 'Python', 'Tableau', 'Bootstrap', 'HTML', 'Frontend', 'CSS', 'Software Development', 'JavaScript', 'Django', 'Git', 'Developer', 'Excel', 'UI', 'Backend', 'MySQL', 'SolidWorks', 'Node.js'}
Full Name: Abhishek Kumar, Skills: {'Data Science', 'Linux', 'Deep Learning', 'React', 'Design', 'GitHub', 'Data Analysis', 'Web', 'Management', 'Software', 'Power BI', 'Python', 'Content', 'Machine Learning', 'Physical', 'CSS', 'Quality Assurance', 'Leadership', 'Software Development', 'SQL', 'JavaScript', 'Git', 'SolidWorks'}
Full Name: Sriramyogi Challamalla, Skills: {'Data Science', 'Linux', 'React', 'GitHub', 'Web', 'AI', 'NumPy', 'Fluid Mechanics', 'Thermodynamics', 'Management', 'Google Cloud', 'Blogging', 'Python', 'Content', 'Bootstrap', 'HTML', 'Machine Learning', 'PyTorch', 'CSS', 'Chrome', 'manufacturing', 'Compu

In [136]:
skill_analysis(stc_recommendation)

Full Name: Mukesh  Bunkar, Skills: {'Data Science', 'Deep Learning', 'Design', 'NLP', 'Data Analysis', 'Marketing', 'Event Management', 'AI', 'NumPy', 'Management', 'Software', 'Logistics', 'Power BI', 'Documentation', 'Python', 'Statistics', 'Machine Learning', 'TensorFlow', 'ML', 'Pandas', 'Excel', 'SQL', 'Data Scientist', 'Keras'}
Full Name: NIKUNJ PANSARI, Skills: {'Deep Learning', 'Compliance', 'React', 'GitHub', 'Data Analysis', 'Web', 'Product Management', 'AI', 'Management', 'Database Management', 'Software', 'Network Security', 'Blogging', 'Node', 'Operations', 'Java', 'Python', 'HTML', 'Machine Learning', 'CSS', 'TensorFlow', 'ML', 'Computer Science', 'JavaScript', 'Git', 'Developer', 'REST API', 'UI', 'Backend', 'MySQL', 'SQL', 'Node.js'}
Full Name: Deepu Kumar Rajak , Skills: {'Data Science', 'Coordination', 'Design', 'Data Analysis', 'Web', 'NumPy', 'Photoshop', 'Management', 'Software', 'Logistics', 'Python', 'HTML', 'Figma', 'Statistics', 'Machine Learning', 'Illustrator

In [137]:
skill_analysis(oai_recommendation)

Full Name: Ashish Kumar, Skills: {'NumPy', 'Data Science', 'Machine Learning', 'Deep Learning', 'Management', 'TensorFlow', 'Campaign', 'Power BI', 'ML', 'Data Analysis', 'Python', 'AI', 'Pandas', 'SQL', 'Business Development', 'Statistics'}
Full Name: Ramavath Suresh , Skills: {'Data Science', 'Deep Learning', 'Design', 'NLP', 'GitHub', 'AI', 'NumPy', 'Management', 'Python', 'Accounting', 'Cloud Services', 'Statistics', 'Data Analytics', 'Machine Learning', 'PyTorch', 'TensorFlow', 'Pandas', 'AWS', 'Git', 'Developer', 'Excel', 'MySQL', 'SQL', 'Keras'}
Full Name: Ashish Sharma, Skills: {'Linux', 'Deep Learning', 'NLP', 'AI', 'HR', 'NumPy', 'Management', 'Google Cloud', 'Software', 'Recruitment', 'Python', 'Statistics', 'Machine Learning', 'Physical', 'TensorFlow', 'ML', 'Developer', 'Git', 'Electronics', 'HR Management', 'Excel', 'SQL', 'Keras'}
Full Name: ROUSHAN KUMAR JHA, Skills: {'Data Science', 'Deep Learning', 'NLP', 'Selenium', 'Web', 'AI', 'Writing', 'NumPy', 'Fluid Mechanics',

In [148]:
sc_list = [i[0] for i in sorted_counter]
len(sc_list)

62

In [157]:
skill_analysis(sc_list)

Full Name: Mukesh  Bunkar, Skills: {'Data Science', 'Deep Learning', 'Design', 'NLP', 'Data Analysis', 'Marketing', 'Event Management', 'AI', 'NumPy', 'Management', 'Software', 'Logistics', 'Power BI', 'Documentation', 'Python', 'Statistics', 'Machine Learning', 'TensorFlow', 'ML', 'Pandas', 'Excel', 'SQL', 'Data Scientist', 'Keras'}
Full Name: Ashish Sharma, Skills: {'Linux', 'Deep Learning', 'NLP', 'AI', 'HR', 'NumPy', 'Management', 'Google Cloud', 'Software', 'Recruitment', 'Python', 'Statistics', 'Machine Learning', 'Physical', 'TensorFlow', 'ML', 'Developer', 'Git', 'Electronics', 'HR Management', 'Excel', 'SQL', 'Keras'}
Full Name: Amit Gupta , Skills: {'Data Science', 'Design', 'Marketing', 'AI', 'automobile', 'Management', 'Power BI', 'Python', 'Tableau', 'Statistics', 'Machine Learning', 'Sales', 'ML', 'manufacturing', 'Project Management', 'Electronics', 'MySQL', 'SQL', 'Finance'}
Full Name: Siddharth Garg, Skills: {'Mechanism', 'Management', 'Reporting', 'SEO', 'Electrical',

In [151]:
shortlisted = ['cdf45022-066a-47c3-8cc6-5fc286ebb90a',
'57f51bc4-809f-4097-8ce5-3dcf5fc2bff8',
'eefa9653-6601-4052-af94-4b73f7c03147',
'50d21b66-9f96-45bc-9e22-c4abdcd498c7',
'304a4b93-54c6-46d2-9b2f-48856356cb75',
'211d8f01-2efd-442b-8083-fb234f88acd1',
'd134d38a-c658-4104-8b50-c8650d92dddd',
'ed5d92b3-4fbb-4b61-9a2e-17096d3f5cbb',
'959d0fbd-5b52-4cc9-b228-bc82812fe224',
'68d88f31-35ce-4961-86f1-deaf19d006ae'
]

In [152]:
skill_analysis(shortlisted)

Full Name: Sahna M Ali, Skills: {'Compliance', 'Design', 'Data Analysis', 'Marketing', 'Product Management', 'AI', 'NumPy', 'Human Resources', 'Management', 'Recruitment', 'Python', 'Tableau', 'Content', 'Statistics', 'Pandas', 'Excel', 'Manager', 'Editing', 'SQL'}
Full Name: Vibhusha Sontakke, Skills: {'React Native', 'React', 'AutoCAD', 'Docker', 'GitHub', 'CRM', 'Web', 'AI', 'iOS', 'Management', 'Software', 'Android', 'Blogging', 'Node', 'Python', 'Tableau', 'Bootstrap', 'HTML', 'Frontend', 'CSS', 'Software Development', 'JavaScript', 'Django', 'Git', 'Developer', 'Excel', 'UI', 'Backend', 'MySQL', 'SolidWorks', 'Node.js'}
Full Name: Mukesh Adhikary, Skills: {'Design', 'Campaign', 'AI', 'Flask', 'Ethereum', 'Mechanism', 'Management', 'Software', 'QA', 'Operations', 'Python', 'Content', 'HTML', 'Statistics', 'CSS', 'Solidity', 'Software Development', 'JavaScript', 'AWS', 'Project Management', 'Swift', 'MySQL', 'Problem Solving'}
Full Name: Jay Dutonde, Skills: {'NumPy', 'Data Science

In [155]:
for i in shortlisted:
  if i in sc_list:
    print(i)

57f51bc4-809f-4097-8ce5-3dcf5fc2bff8
eefa9653-6601-4052-af94-4b73f7c03147
50d21b66-9f96-45bc-9e22-c4abdcd498c7
304a4b93-54c6-46d2-9b2f-48856356cb75
959d0fbd-5b52-4cc9-b228-bc82812fe224
68d88f31-35ce-4961-86f1-deaf19d006ae


In [156]:
# prompt: I want to get count of shorlisted list items that are in tf_recommendation, jc_recommendation, bt_recommendation, stc_recommendation, oai_recommendation individually

tf_count = sum(i in tf_recommendation for i in shortlisted)
jc_count = sum(i in jc_recommendation for i in shortlisted)
bt_count = sum(i in bt_recommendation for i in shortlisted)
stc_count = sum(i in stc_recommendation for i in shortlisted)
oai_count = sum(i in oai_recommendation for i in shortlisted)

print(f"TF Count: {tf_count}")
print(f"JC Count: {jc_count}")
print(f"BT Count: {bt_count}")
print(f"STC Count: {stc_count}")
print(f"OAI Count: {oai_count}")


TF Count: 4
JC Count: 0
BT Count: 4
STC Count: 1
OAI Count: 1
