In [14]:
from gensim.models import Word2Vec
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import spacy

In [2]:
job_data = pd.read_csv('Processed dataset/job.csv')
resume_data = pd.read_csv('Processed dataset/resume.csv')

In [5]:
vectorizer = TfidfVectorizer()
corpus  = job_data['Job Descriptions'].tolist()+resume_data['Resume'].tolist()

In [6]:
tfidf_matrix = vectorizer.fit_transform(corpus)

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

In [51]:
job_input = ['''Job Title: Web Developer
Location: Remote
Type: Full-time

Job Summary:
We are looking for a skilled and enthusiastic Web Developer to join our growing team. The ideal candidate will have experience in building responsive, high-quality websites and web applications, with a strong focus on user experience and performance. You will work closely with designers, project managers, and other developers to create dynamic and visually appealing websites.

Key Responsibilities:
Develop, test, and maintain web applications and websites.
Collaborate with cross-functional teams (designers, product managers) to define and implement new features.
Optimize applications for maximum speed and scalability.
Ensure web applications are mobile-responsive and cross-browser compatible.
Troubleshoot, debug, and resolve issues.
Write clean, maintainable code following best practices and standards.
Stay up-to-date with emerging web technologies and trends.
Requirements:
Proficiency in HTML, CSS, and JavaScript.
Experience with front-end frameworks (e.g., React, Vue.js, Angular).
Knowledge of back-end languages such as Node.js, Python, Ruby, or PHP.
Familiarity with databases (e.g., MySQL, MongoDB, PostgreSQL).
Strong understanding of version control using Git.
Experience with web services and RESTful APIs.
Excellent problem-solving and communication skills.
Ability to work independently and in a team.
Preferred Skills:
Knowledge of responsive design principles.
Experience with web performance optimization.
Familiarity with Agile/Scrum methodologies.
Understanding of web accessibility standards.
Experience with deployment and CI/CD pipelines.
Education & Experience:
Bachelor’s degree in Computer Science, Web Development, or related field (or equivalent experience).
1–3 years of professional web development experience for entry-level; 3+ years for mid/senior level.

''']

In [52]:
resume_input = ['''Name: John Doe
Contact: johndoe@email.com | (123) 456-7890
LinkedIn: linkedin.com/in/johndoe

Objective: A data-driven professional with 3+ years of experience in analyzing data and providing actionable insights. Skilled in SQL, Python, Tableau, and data visualization. Looking for a challenging Data Analyst role to contribute to data-driven decision-making.

Skills:

SQL (MySQL, PostgreSQL)
Python (Pandas, NumPy)
Tableau
Data Analysis & Visualization
Statistical Analysis
Data Cleaning & Preprocessing
Experience:

Data Analyst | XYZ Corp.
March 2021 - Present

Developed and maintained interactive dashboards using Tableau.
Wrote SQL queries to extract, filter, and aggregate large datasets.
Analyzed sales data to identify trends and forecast future sales.
Collaborated with marketing and finance teams to improve business processes.
Junior Data Analyst | ABC Ltd.
June 2019 - February 2021

Assisted senior analysts in data collection, cleaning, and preprocessing.
Conducted statistical analysis to measure customer satisfaction.
Automated daily reporting tasks using Python.
Education:

Bachelor of Science in Computer Science
University of XYZ, Graduated: May 2019''',
'''Name: Jane Smith
Contact: janesmith@email.com | (987) 654-3210
LinkedIn: linkedin.com/in/janesmith

Objective: A skilled software engineer with 4+ years of experience in developing scalable and efficient software solutions using Java, Spring Framework, and microservices. Looking for a challenging role to leverage my expertise in full-stack development and cloud technologies.

Skills:

Java (Spring, Hibernate)
RESTful APIs
Microservices Architecture
AWS, Docker, Kubernetes
ReactJS
Git, CI/CD
Experience:

Software Engineer | Tech Solutions Inc.
January 2020 - Present

Designed and implemented a microservices-based architecture for a cloud-native application using Spring Boot.
Developed RESTful APIs for seamless integration with front-end applications.
Worked with AWS services like S3, Lambda, and DynamoDB.
Collaborated with front-end developers to build an intuitive user interface with React.
Junior Software Developer | CodeLab
July 2018 - December 2019

Assisted in the development of Java-based applications using the Spring Framework.
Wrote unit and integration tests to ensure application reliability.
Participated in daily Agile sprints and sprint retrospectives.
Education:

Bachelor of Technology in Computer Science
ABC University, Graduated: May 2018
''',
'''Name: Sarah Johnson
Contact: sarahjohnson@email.com | (555) 123-4567
LinkedIn: linkedin.com/in/sarahjohnson

Objective: Dynamic and results-driven Marketing Manager with 5+ years of experience in digital marketing, campaign management, and brand development. Passionate about leveraging data-driven strategies to drive growth and engagement. Looking for a challenging position to utilize my skills and expertise in a forward-thinking company.

Skills:

Digital Marketing (SEO, SEM, Social Media)
Brand Strategy
Marketing Analytics (Google Analytics, HubSpot)
Campaign Management
Team Leadership and Collaboration
Content Creation and Management
Experience:

Marketing Manager | ABC Marketing Agency
January 2020 - Present

Developed and executed successful digital marketing campaigns across SEO, Google Ads, and social media platforms.
Led a team of marketing professionals to increase brand awareness and customer engagement.
Analyzed campaign performance and optimized marketing strategies to improve ROI.
Managed content creation for email campaigns, blogs, and social media posts.
Digital Marketing Specialist | XYZ Corporation
June 2017 - December 2019

Managed and optimized paid search campaigns (Google Ads, Bing Ads).
Conducted A/B testing on landing pages to increase conversion rates.
Worked closely with product and sales teams to align marketing strategies with business goals.
Education:

Bachelor of Science in Marketing
University of ABC, Graduated: May 2017''',
'''John Doe
john.doe@email.com | (123) 456-7890 | LinkedIn: linkedin.com/in/johndoe | GitHub: github.com/johndoe

Objective:
Aspiring Web Developer with a solid foundation in front-end and back-end development. Eager to apply skills in HTML, CSS, JavaScript, and React to contribute to a growing web development team.

Skills:

HTML, CSS, JavaScript
React.js, Bootstrap, jQuery
Git, GitHub, Version Control
Basic knowledge of Node.js
Responsive Web Design
RESTful APIs
Experience:
Intern | Web Development Intern
Tech Solutions Inc. | June 2023 – August 2023

Assisted in building responsive websites using HTML, CSS, and JavaScript.
Worked with the design team to convert mockups into fully functional web pages.
Optimized web applications for mobile devices and various screen sizes.
Wrote clean and reusable code and participated in daily stand-ups and code reviews.
Education:
Bachelor of Science in Computer Science
University of California, Los Angeles | Graduated: May 2023

Relevant coursework: Web Development, JavaScript Programming, Data Structures'''
]

In [54]:
nlp = spacy.load('en_core_web_sm')
def processing(content):
    doc = nlp(content)
    processed_tokens = [
        token.lemma_ for token in doc 
        if not token.is_stop and not token.is_punct and not token.is_space
    ]
    processed_text = ' '.join(processed_tokens)
    processed_text= re.sub(r'[^A-Za-z0-9\s]', '', processed_text)
    return processed_text.lower()

In [55]:
job_tokens= [processing(job) for job in job_input]
resume_tokens = [processing(resume) for resume in resume_input]

In [60]:

resume_vectors = vectorizer.transform(resume_tokens).toarray()
resume_vectors
resume_vectors.shape

(4, 16849)

In [58]:
job_vectors = vectorizer.transform(job_tokens).toarray()
job_vectors

array([[0., 0., 0., ..., 0., 0., 0.]])

In [61]:
sim= []
job_vector = job_vectors[0].reshape(1, -1)
for idx,resume_vector in enumerate(resume_vectors):
    resume_vector = resume_vector.reshape(1, -1)
    similarity = cosine_similarity(resume_vector, job_vector)
    sim.append((idx, similarity))

In [62]:
sorted_sim = sorted(sim, key=lambda x: x[1], reverse=True)

In [63]:
for idx, score in sorted_sim:
    print(f"Resume index: {idx+1}, Similarity score: {score}")

Resume index: 4, Similarity score: [[0.33731705]]
Resume index: 2, Similarity score: [[0.1318524]]
Resume index: 1, Similarity score: [[0.05799219]]
Resume index: 3, Similarity score: [[0.04622304]]
