In [11]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
job_data = pd.read_csv('jobs_data2.csv')
job_data = job_data.drop_duplicates(subset=["skills"], keep='first')
job_data["content"] = job_data["Heading"]+" "+job_data["description"] + " " +job_data["skills"].apply(lambda x: " ".join(eval(x)))
vectorizer = CountVectorizer()
job_matrix = vectorizer.fit_transform(job_data["content"])
job_indices = pd.Series(job_data.index, index=job_data["Heading"]).drop_duplicates()
def get_recommendations(input_skills, top=3):
    input_matrix = vectorizer.transform([input_skills])
    similarity_scores = cosine_similarity(job_matrix, input_matrix).flatten()
    top_indices = similarity_scores.argsort()[::-1][:top]
    recommended_jobs = job_data["Heading"].iloc[top_indices].values
    print(similarity_scores)
    return recommended_jobs
input_skills = "python,java, python, python"
print([input_skills])
print(len([input_skills]))
top = 6
recommended_jobs = get_recommendations(input_skills, top)

print(f"Recommended jobs for skills '{input_skills}':")
for job_title in recommended_jobs:
    print(job_title)

['python,java, python, python']
1
[0.         0.         0.15389675 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.17038855 0.
 0.         0.         0.         0.         0.         0.04714045
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.06324555 0.         0.         0.        ]
Recommended jobs for skills 'python,java, python, python':
Associate Tech Specialist
Data Analyst
IBM BAW Developer
Android Freshers
Game Programmer/ Developer
Php Laravel Developer


In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

job_data = pd.read_csv('jobs_data2.csv')
job_data = job_data.drop_duplicates(subset=["skills"], keep='first')
job_data["content"] = job_data["Heading"] + " " + job_data["description"] + " " + job_data["skills"].apply(lambda x: " ".join(eval(x)))

vectorizer = TfidfVectorizer()
job_matrix = vectorizer.fit_transform(job_data["content"])
job_indices = pd.Series(job_data.index, index=job_data["Heading"]).drop_duplicates()

def get_recommendations(input_skills, top=3):
    input_matrix = vectorizer.transform([input_skills])
    similarity_scores = linear_kernel(job_matrix, input_matrix).flatten()
    top_indices = similarity_scores.argsort()[::-1][:top]
    recommended_jobs = job_data["Heading"].iloc[top_indices].values
    print(similarity_scores)
    return recommended_jobs

input_skills = "python,java, python, python"
print([input_skills])
print(len([input_skills]))
top = 6
recommended_jobs = get_recommendations(input_skills, top)

print(f"Recommended jobs for skills '{input_skills}':")
for job_title in recommended_jobs:
    print(job_title)


['python,java, python, python']
1
[0.         0.         0.17625328 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.18466275 0.
 0.         0.         0.         0.         0.         0.05058772
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.07005874 0.         0.         0.        ]
Recommended jobs for skills 'python,java, python, python':
Associate Tech Specialist
Data Analyst
IBM BAW Developer
Android Freshers
Game Programmer/ Developer
Php Laravel Developer


In [23]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel

job_data = pd.read_csv('jobs_data2.csv')
job_data = job_data.drop_duplicates(subset=["skills"], keep='first')
job_data["content"] = job_data["Heading"] + " " + job_data["description"] + " " + job_data["skills"].apply(lambda x: " ".join(eval(x)))

vectorizer_count = CountVectorizer()
job_matrix_count = vectorizer_count.fit_transform(job_data["content"])

vectorizer_tfidf = TfidfVectorizer()
job_matrix_tfidf = vectorizer_tfidf.fit_transform(job_data["content"])

job_indices = pd.Series(job_data.index, index=job_data["Heading"]).drop_duplicates()

def get_recommendations(input_skills, top=3, count_weight=0.5, tfidf_weight=0.5):
    input_matrix_count = vectorizer_count.transform([input_skills])
    input_matrix_tfidf = vectorizer_tfidf.transform([input_skills])

    similarity_scores_count = cosine_similarity(job_matrix_count, input_matrix_count).flatten()
    similarity_scores_tfidf = linear_kernel(job_matrix_tfidf, input_matrix_tfidf).flatten()

    # Combine the similarity scores using weighted average
    similarity_scores_combined = (count_weight * similarity_scores_count) + (tfidf_weight * similarity_scores_tfidf)

    top_indices = similarity_scores_combined.argsort()[::-1][:top]
    recommended_jobs = job_data["Heading"].iloc[top_indices].values
    print(similarity_scores_combined)
    return recommended_jobs

input_skills = "python,java, python, python"
top = 6
count_weight = 0.5  # Weight for cosine similarity
tfidf_weight = 0.5  # Weight for TF-IDF similarity

recommended_jobs = get_recommendations(input_skills, top, count_weight, tfidf_weight)

print(f"Recommended jobs for skills '{input_skills}':")
for job_title in recommended_jobs:
    print(job_title)



[0.         0.         0.16507501 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.17752565 0.
 0.         0.         0.         0.         0.         0.04886409
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.06665215 0.         0.         0.        ]
Recommended jobs for skills 'python,java, python, python':
Associate Tech Specialist
Data Analyst
IBM BAW Developer
Android Freshers
Game Programmer/ Developer
Php Laravel Developer


In [1]:
import pandas as pd
from sklearn.metrics import pairwise_distances
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
job_data = pd.read_csv('jobs_data2.csv')
job_data = job_data.drop_duplicates(subset=["skills"], keep='first')
job_data["content"] = job_data["Heading"]+" "+job_data["description"] + " " +job_data["skills"].apply(lambda x: " ".join(eval(x)))
vectorizer = CountVectorizer()
job_matrix = vectorizer.fit_transform(job_data["content"])
job_indices = pd.Series(job_data.index, index=job_data["Heading"]).drop_duplicates()
def jaccard_similarity(X, Y):
    X_dense = X.toarray()
    Y_dense = Y.toarray()
    return  1-pairwise_distances(X_dense, Y_dense, metric='jaccard')

def get_recommendations(input_skills, top=3):
    
    input_matrix = vectorizer.transform([input_skills])
    jaccard_similarity_scores = jaccard_similarity(job_matrix, input_matrix).flatten()
    top_indices = similarity_scores.argsort()[::-1][:top]
    recommended_jobs = job_data["Heading"].iloc[top_indices].values
    print(similarity_scores)
    return recommended_jobs
input_skills = "python,java, python, python"
print([input_skills])
print(len([input_skills]))
top = 6
recommended_jobs = get_recommendations(input_skills, top)

print(f"Recommended jobs for skills '{input_skills}':")
for job_title in recommended_jobs:
    print(job_title)

['python,java, python, python']
1


NameError: name 'pairwise_distances' is not defined

In [4]:
job_data

Unnamed: 0,Heading,Company,location,description,Vacancy Link,Experience Needed,Salary,skills
0,Ui Developer,Oracle,Chennai,"Classic ASP, .NET, MVC, .NET Core, JavaScript,...",https://www.naukri.com/job-listings-ui-develop...,1-5 Yrs,Not disclosed,"['CSS', 'UI Development', 'HTML', 'javascript'..."
1,Professional / Technical Fixed Term,Tata Motors Finance,Chennai,"Experience in interface of SAP, p-first system...",https://www.naukri.com/job-listings-profession...,1-4 Yrs,Not disclosed,"['technical', 'translation', 'analytical skill..."
2,Data Analyst,Quickxpert Infotech,Chennai,We have immediate openings for Data Analyst Tr...,https://www.naukri.com/job-listings-data-analy...,0-5 Yrs,Not disclosed,"['SQL', 'python', 'c++', 'data analysis', 'dbm..."
3,Associate Tech Specialist,Essenware Private Limted,Chennai,Candidate should be a self- . Experience with ...,https://www.naukri.com/job-listings-associate-...,1-2 Yrs,Not disclosed,"['fusion', 'oracle', 'application', 'applicati..."
4,Software Development Engineer in Test (SDET),Browserstack,Chennai,. 1- 3 years of experience writing automated t...,https://www.naukri.com/job-listings-software-d...,1-3 Yrs,Not disclosed,"['quality assurance', 'automation', 'test case..."
5,Software Engineer,Essenware Private Limted,Chennai,. Should know how to operate computer . Candid...,https://www.naukri.com/job-listings-software-e...,1-3 Yrs,Not disclosed,"['software', 'software engineering', 'excel', ..."
6,Associate Tech Specialist,Essenware Private Limted,Chennai,Develop UI prototypes for website in complianc...,https://www.naukri.com/job-listings-associate-...,1-2 Yrs,Not disclosed,"['Backend', 'DOM', 'Data management', 'Archite..."
7,Senior Business Analyst,IGT Solutions,Chennai,"BE / B Tech in Computer Science, Information T...",https://www.naukri.com/job-listings-senior-bus...,1-5 Yrs,Not disclosed,"['project', 'information technology', 'technol..."
8,Senior Business Analyst,IGT Solutions,Chennai,Analyze / understand all data and information ...,https://www.naukri.com/job-listings-senior-bus...,1-5 Yrs,Not disclosed,"['business analysis', 'ms office', 'senior', '..."
9,Software Engineer,Infineon,Chennai,Exposure to Unix\/ Linux Platforms . . Prefera...,https://www.naukri.com/job-listings-software-e...,1-3 Yrs,Not disclosed,"['object', 'coding', 'xml', 'data structures',..."


In [5]:
#job_data['skills'] = job_data['skills'].apply(lambda x: " ".join(eval(x)))
job_data["content"] = job_data["description"] + " " +job_data["skills"].apply(lambda x: " ".join(eval(x)))

In [6]:
job_data["content"]

0       Classic ASP, .NET, MVC, .NET Core, JavaScript,...
1       Experience in interface of SAP, p-first system...
2       We have immediate openings for Data Analyst Tr...
3       Candidate should be a self- . Experience with ...
4       . 1- 3 years of experience writing automated t...
5       . Should know how to operate computer . Candid...
6       Develop UI prototypes for website in complianc...
7       BE / B Tech in Computer Science, Information T...
8       Analyze / understand all data and information ...
9       Exposure to Unix\/ Linux Platforms . . Prefera...
10      Previous working experience as a PHP / Laravel...
11      Monitoring and measuring customer experience a...
12      Minimum 5 years experience in Data Warehousing...
13      TO contribute TO a project ON a given technolo...
14      Required Skills Networking: Experience in vali...
15      6+ years of experience in consulting services ...
16      Exposure to Terraform and Cloud Formations Exp...
17      . Bach

In [7]:
len(job_data)

40

In [8]:
job_data['skills']

0       ['CSS', 'UI Development', 'HTML', 'javascript'...
1       ['technical', 'translation', 'analytical skill...
2       ['SQL', 'python', 'c++', 'data analysis', 'dbm...
3       ['fusion', 'oracle', 'application', 'applicati...
4       ['quality assurance', 'automation', 'test case...
5       ['software', 'software engineering', 'excel', ...
6       ['Backend', 'DOM', 'Data management', 'Archite...
7       ['project', 'information technology', 'technol...
8       ['business analysis', 'ms office', 'senior', '...
9       ['object', 'coding', 'xml', 'data structures',...
10      ['sql', 'front end', 'development', 'rdbms', '...
11      ['improvement', 'vulnerability assessment', 'c...
12      ['query', 'sql', 'data', 'estimation', 'report...
13      ['project', 'technical analysis', 'mentoring',...
14      ['networking', 'automation', 'linux', 'redhat ...
15      ['partner management', 'research', 'business a...
16      ['cloud', 'subversion', 'aws', 'python', 'arch...
17      ['soft

In [9]:
vectorizer = CountVectorizer()
job_matrix = vectorizer.fit_transform(job_data["content"])
job_indices = pd.Series(job_data.index, index=job_data["Heading"]).drop_duplicates()

In [10]:
job_matrix.shape

(40, 398)

In [11]:
job_indices

Heading
Ui Developer                                                          0
Professional / Technical Fixed Term                                   1
Data Analyst                                                          2
Associate Tech Specialist                                             3
Software Development Engineer in Test (SDET)                          4
Software Engineer                                                     5
Associate Tech Specialist                                             6
Senior Business Analyst                                               7
Senior Business Analyst                                               8
Software Engineer                                                     9
Php Laravel Developer                                                10
DevOps Engineer                                                      11
Associate Tech Specialist                                            12
Associate Tech Specialist                               

In [12]:
def get_recommendations(input_skills, top=3):
    input_matrix = vectorizer.transform([input_skills])
    similarity_scores = cosine_similarity(job_matrix, input_matrix).flatten()
    top_indices = similarity_scores.argsort()[::-1][:top]
    recommended_jobs = job_data["Heading"].iloc[top_indices].values
    print(similarity_scores)
    return recommended_jobs

In [13]:
input_skills = "ui design cs"
top = 6
recommended_jobs = get_recommendations(input_skills, top)

print(f"Recommended jobs for skills '{input_skills}':")
for job_title in recommended_jobs:
    print(job_title)

[0.30618622 0.         0.         0.         0.         0.
 0.10206207 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.17609018 0.21081851 0.         0.         0.         0.
 0.         0.         0.         0.51847585 0.         0.
 0.         0.         0.         0.         0.         0.
 0.12309149 0.         0.         0.1132277 ]
Recommended jobs for skills 'ui design cs':
UI Developer
Ui Developer
Associate Solution Designer
Solution Designer
IBM BAW Developer
Game Programmer/ Developer
