In [2]:
import PyPDF2
import docx2txt
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/pradeesh11/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/pradeesh11/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
def extract_text_from_pdf(file_path):
    pdf_file = open(file_path, 'rb')
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in range(len(pdf_reader.pages)):
        page_obj = pdf_reader.pages[page]
        text += page_obj.extract_text()
    pdf_file.close()
    return text

In [4]:
def extract_text_from_docx(file_path):
    text = docx2txt.process(file_path)
    return text

In [5]:
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text.lower())
    filtered_tokens = [word for word in word_tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def calculate_similarity(resume_text, jd_text):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([resume_text, jd_text])
    similarity_score = cosine_similarity(vectors)[0][1]
    return similarity_score

In [None]:

def main():
    resume_file_path = "Dataset/TESTINGFILE1.pdf"
    jd_file_path = "Dataset/TESTINGFILE2.pdf"

    if resume_file_path.endswith('.pdf'):
        resume_text = extract_text_from_pdf(resume_file_path) #pdf
        jd_text = extract_text_from_pdf(jd_file_path)
    elif resume_file_path.endswith('.docx'):
        resume_text = extract_text_from_docx(resume_file_path) #docx
        jd_text = extract_text_from_pdf(jd_file_path)
    else:
        print("Unsupported file format.")
        return
    
    preprocessed_resume_text = preprocess_text(resume_text)
    preprocessed_jd_text = preprocess_text(jd_text)

    similarity_score = calculate_similarity(' '.join(preprocessed_resume_text), ' '.join(preprocessed_jd_text))
    print("The similarity score between the resume and job description is: ", similarity_score)

if __name__ == "__main__":
    main()

In [16]:
def main():
    resume_file_path = "PradeeshResume.pdf"

    # Provide the job description as plain text
    jd_text = """Amazon is at the forefront of innovative technology, driving progress in AI and machine learning solutions. Our mission is to leverage advanced data science and engineering to deliver impactful and scalable products for our clients. We are looking for a passionate Machine Learning Engineer to join our dynamic team and contribute to cutting-edge projects.

Key Responsibilities:

Design, develop, and deploy scalable machine learning models and algorithms for a variety of applications.
Collaborate with cross-functional teams, including data scientists, software engineers, and product managers, to gather requirements and implement solutions.
Preprocess and analyze large datasets, ensuring data quality and feature engineering for optimal model performance.
Build and maintain machine learning pipelines for training, evaluation, and deployment.
Optimize and fine-tune models for performance, scalability, and accuracy using techniques such as hyperparameter tuning and model compression.
Conduct thorough testing and validation of models to ensure reliability and robustness in production.
Monitor and maintain deployed models, implementing strategies for model retraining and performance tracking.
Stay current with the latest advancements in machine learning and AI, applying innovative techniques and technologies as appropriate.
Document processes, model architectures, and code to ensure maintainability and knowledge sharing within the team.
Qualifications:

Bachelor's or Master's degree in Computer Science, AI/ML, Data Science, or a related field.
Strong proficiency in Python and experience with libraries/frameworks such as TensorFlow, PyTorch, or Scikit-Learn.
Solid understanding of machine learning algorithms, neural networks, and deep learning architectures.
Experience with data preprocessing and feature engineering.
Hands-on experience with cloud platforms (AWS, GCP, or Azure) for deploying machine learning models.
Familiarity with MLOps practices, including version control, CI/CD pipelines, and model monitoring.
Excellent problem-solving skills and the ability to work collaboratively in a team environment.
Knowledge of big data tools (e.g., Spark, Hadoop) and database technologies is a plus.
Nice-to-Have:

Experience in Natural Language Processing (NLP) and working with transformers and LLMs.
Understanding of computer vision and related frameworks.
Contributions to open-source ML projects or participation in hackathons.
Perks and Benefits:

Competitive salary and performance-based bonuses.
Flexible working hours and remote work options.
Access to training programs, conferences, and certifications for continuous learning.
Comprehensive health and wellness benefits.
A collaborative and inclusive work culture with opportunities for career growth."""

    if resume_file_path.endswith('.pdf'):
        resume_text = extract_text_from_pdf(resume_file_path)  # pdf
    elif resume_file_path.endswith('.docx'):
        resume_text = extract_text_from_docx(resume_file_path)  # docx
    else:
        print("Unsupported file format.")
        return

    preprocessed_resume_text = preprocess_text(resume_text)
    preprocessed_jd_text = preprocess_text(jd_text)

    similarity_score = calculate_similarity(' '.join(preprocessed_resume_text), ' '.join(preprocessed_jd_text))
    print("The similarity score between the resume and job description is: ", similarity_score)

if __name__ == "__main__":
    main()


The similarity score between the resume and job description is:  0.32852134033908575


In [30]:
import spacy
import PyPDF2
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer

# Load required models
nlp = spacy.load("en_core_web_sm")  # spaCy for preprocessing and NER
keybert_model = KeyBERT()  # KeyBERT for skill and keyword extraction
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')  # SBERT for similarity matching


def extract_text_from_pdf(file_path):
    pdf_file = open(file_path, 'rb')
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in range(len(pdf_reader.pages)):
        page_obj = pdf_reader.pages[page]
        text += page_obj.extract_text()
    pdf_file.close()
    return text

# Step 1: Preprocessing Function
def preprocess_text(text):

    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(tokens)


# Step 2: Skill and Keyword Extraction Function
def extract_keywords(text):

    keywords = keybert_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=100)
    return [kw[0] for kw in keywords]


# Step 3: Semantic Similarity Function
def calculate_similarity(resume_text, job_description):
    # Generate embeddings
    embeddings = sbert_model.encode([resume_text, job_description], convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(embeddings[0], embeddings[1])
    return similarity_score.item()  # Return as a scalar


# Step 4: Scoring Function
def calculate_ats_score(resume_text, job_description):
    # Preprocess texts
    clean_resume = preprocess_text(resume_text)
    clean_job_description = preprocess_text(job_description)

    # Extract skills/keywords
    resume_keywords = extract_keywords(clean_resume)
    job_keywords = extract_keywords(clean_job_description)

    # Calculate skill similarity
    skill_similarity = calculate_similarity(" ".join(resume_keywords), " ".join(job_keywords))

    # Calculate overall similarity
    overall_similarity = calculate_similarity(clean_resume, clean_job_description)

    # Weighted scoring
    ats_score = (0.5 * skill_similarity) + (0.3 * overall_similarity) + (0.2 * overall_similarity)
    return round(ats_score * 100, 2)  # Scale to percentage


ATS Score: 8.29%


In [26]:
import spacy
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load required models
nlp = spacy.load("en_core_web_sm")  # spaCy for preprocessing and NER
keybert_model = KeyBERT()  # KeyBERT for skill and keyword extraction
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')  # SBERT for similarity matching


# Step 1: Preprocessing Function
def preprocess_text(text):
    """
    Preprocesses the input text using spaCy.
    """
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(tokens)


# Step 2: Skill and Keyword Extraction Function
def extract_keywords(text):
    """
    Extracts key skills/keywords using KeyBERT.
    """
    keywords = keybert_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=10)
    return [kw[0] for kw in keywords]


# Step 3: Semantic Similarity Function
def calculate_similarity(resume_text, job_description):
    """
    Calculates the semantic similarity using SBERT embeddings.
    """
    # Generate embeddings
    embeddings = sbert_model.encode([resume_text, job_description], convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(embeddings[0], embeddings[1])
    return similarity_score.item()  # Return as a scalar


# Step 4: Scoring Function
def calculate_ats_score(resume_text, job_description):
    """
    Calculates the ATS score by combining skills, experience, and education similarity.
    """
    # Preprocess texts
    clean_resume = preprocess_text(resume_text)
    clean_job_description = preprocess_text(job_description)

    # Extract skills/keywords
    resume_keywords = extract_keywords(clean_resume)
    job_keywords = extract_keywords(clean_job_description)

    # Calculate skill similarity
    skill_similarity = calculate_similarity(" ".join(resume_keywords), " ".join(job_keywords))

    # Calculate overall similarity
    overall_similarity = calculate_similarity(clean_resume, clean_job_description)

    # Weighted scoring
    ats_score = (0.5 * skill_similarity) + (0.3 * overall_similarity) + (0.2 * overall_similarity)
    return round(ats_score * 100, 2)  # Scale to percentage


# Example Usage
if __name__ == "__main__":
    # Input: Resume text and Job description
    resume = """
    John Doe
    Experienced data scientist with expertise in Python, machine learning, and deep learning. 
    Proficient in tools like TensorFlow, PyTorch, and scikit-learn. 
    Strong analytical skills with a background in mathematics and statistics. 
    Certified AWS Solutions Architect and Google Cloud Engineer.
    """
    
    job_description = """
    We are looking for a data scientist proficient in Python and machine learning. 
    Candidates should have experience with deep learning frameworks like TensorFlow or PyTorch 
    and a strong understanding of statistics. Cloud certification (AWS or GCP) is a plus.
    """

    # Calculate ATS Score
    ats_score = calculate_ats_score(resume, job_description)
    print(f"ATS Score: {ats_score}%")


ATS Score: 76.37%


In [None]:
Amazon is at the forefront of innovative technology, driving progress in AI and machine learning solutions. Our mission is to leverage advanced data science and engineering to deliver impactful and scalable products for our clients. We are looking for a passionate Machine Learning Engineer to join our dynamic team and contribute to cutting-edge projects.

Key Responsibilities:

Design, develop, and deploy scalable machine learning models and algorithms for a variety of applications.
Collaborate with cross-functional teams, including data scientists, software engineers, and product managers, to gather requirements and implement solutions.
Preprocess and analyze large datasets, ensuring data quality and feature engineering for optimal model performance.
Build and maintain machine learning pipelines for training, evaluation, and deployment.
Optimize and fine-tune models for performance, scalability, and accuracy using techniques such as hyperparameter tuning and model compression.
Conduct thorough testing and validation of models to ensure reliability and robustness in production.
Monitor and maintain deployed models, implementing strategies for model retraining and performance tracking.
Stay current with the latest advancements in machine learning and AI, applying innovative techniques and technologies as appropriate.
Document processes, model architectures, and code to ensure maintainability and knowledge sharing within the team.
Qualifications:

Bachelor's or Master's degree in Computer Science, AI/ML, Data Science, or a related field.
Strong proficiency in Python and experience with libraries/frameworks such as TensorFlow, PyTorch, or Scikit-Learn.
Solid understanding of machine learning algorithms, neural networks, and deep learning architectures.
Experience with data preprocessing and feature engineering.
Hands-on experience with cloud platforms (AWS, GCP, or Azure) for deploying machine learning models.
Familiarity with MLOps practices, including version control, CI/CD pipelines, and model monitoring.
Excellent problem-solving skills and the ability to work collaboratively in a team environment.
Knowledge of big data tools (e.g., Spark, Hadoop) and database technologies is a plus.
Nice-to-Have:

Experience in Natural Language Processing (NLP) and working with transformers and LLMs.
Understanding of computer vision and related frameworks.
Contributions to open-source ML projects or participation in hackathons.
Perks and Benefits:

Competitive salary and performance-based bonuses.
Flexible working hours and remote work options.
Access to training programs, conferences, and certifications for continuous learning.
Comprehensive health and wellness benefits.