In [10]:
import fitz
import os
import spacy

In [11]:
def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(document.page_count):
        page = document.load_page(page_num)
        text += page.get_text()
    return text

# pdf_path = "Vishwaa D A - Batch 2025 - B.Tech. - Information Technology - mjXj5ZC (1).pdf"
pdf_path = "Vijay Veerasekaran - Batch 2025 - B.Tech. - Information Technology - iF5nlmC.pdf"
resume_text = extract_text_from_pdf(pdf_path)
print(resume_text)

VIJAY VEERASEKARAN
B.Tech. - Information Technology
Ph: +91-6379435481
Email: vijay2110746@ssn.edu.in
Nagapattinam, Tamil Nadu, India - 614707 
Python  Sql  Flask  API  OOPs
BRIEF SUMMARY
I'm driven by a constant thirst for knowledge, which fuels my personal growth. Learning is more than a hobby—it's a mindset that shapes my life.
This enthusiasm has transformed my personal and professional journey, helping me adapt, acquire diverse skills, and face challenges positively. My
commitment to ongoing learning defines me, propelling me toward self-discovery and growth. In essence, my passion for learning propels me to
explore the unknown, adapt, and continually better myself.
KEY EXPERTISE
EDUCATION
2021  -  2025
Sri Sivasubramaniya Nadar College of Engineering
B.Tech. - Information Technology |  CGPA: 7.98 / 10.00
2021
RVS BHARATH MATRICULATION SCHOOL, Nagapattinam
12th |  TAMILNADU HIGHER SECONDARY BOARD |  Percentage: 97.00 / 100.00
2019
RVS BHARATH MATRICULATION SCHOOL, Nagapattinam
10t

In [12]:
nlp = spacy.load("en_core_web_sm")

predefined_skills = ["Python" ,  "Flask" ,  "API"  , "MySQL"  , "Databases" ,  "Programming" ,  "JIRA" ,  "Github" ,"javascript" , "pandas" , "dsa"]
predefined_skills_lower = [skill.lower() for skill in predefined_skills]

def extract_skills(text, predefined_skills):
    doc = nlp(text)
    skills = []
    for token in doc:
        if token.text in predefined_skills:
            skills.append(token.text)
    return skills

skills_extracted = list(set(extract_skills(resume_text, predefined_skills)))
print("Extracted skills: ",skills_extracted)

Extracted skills:  ['Programming', 'JIRA', 'API', 'Python', 'Github', 'MySQL', 'Databases', 'Flask']


In [13]:
def match_and_rank_resumes(resume_texts, required_skills):
    required_skills_lower = [skill.lower() for skill in required_skills]
    resume_rankings = []
    for resume_text in resume_texts:
        resume_text_lower = resume_text.lower()
        extracted_skills = extract_skills(resume_text_lower, predefined_skills_lower)
        matching_skills = set([skill.lower() for skill in extracted_skills]) & set(required_skills_lower)
        score = len(matching_skills)
        resume_rankings.append((resume_text, score))
    
    ranked_resumes = sorted(resume_rankings, key=lambda x: x[1], reverse=True)
    return ranked_resumes



import os

def process_resumes(pdf_folder, required_skills):
    resume_texts = []

    for pdf_file in os.listdir(pdf_folder):
        if pdf_file.endswith(".pdf"):
            pdf_path = os.path.join(pdf_folder, pdf_file)
            resume_text = extract_text_from_pdf(pdf_path)
            resume_texts.append(resume_text)

    ranked_resumes = match_and_rank_resumes(resume_texts, required_skills)
    
    return ranked_resumes

pdf_folder = "resumes"
required_skills =  ['Python', 'Databases', 'Programming', 'JIRA', 'Flask', 'Github', 'API', 'MySQL']
ranked_resumes = process_resumes(pdf_folder, required_skills)

for idx, (resume, score) in enumerate(ranked_resumes):
    print(f"Resume {idx+1}: Score {score}")
    print(resume)

Resume 1: Score 8
VIJAY VEERASEKARAN
B.Tech. - Information Technology
Ph: +91-6379435481
Email: vijay2110746@ssn.edu.in
Nagapattinam, Tamil Nadu, India - 614707 
Python  Sql  Flask  API  OOPs
BRIEF SUMMARY
I'm driven by a constant thirst for knowledge, which fuels my personal growth. Learning is more than a hobby—it's a mindset that shapes my life.
This enthusiasm has transformed my personal and professional journey, helping me adapt, acquire diverse skills, and face challenges positively. My
commitment to ongoing learning defines me, propelling me toward self-discovery and growth. In essence, my passion for learning propels me to
explore the unknown, adapt, and continually better myself.
KEY EXPERTISE
EDUCATION
2021  -  2025
Sri Sivasubramaniya Nadar College of Engineering
B.Tech. - Information Technology |  CGPA: 7.98 / 10.00
2021
RVS BHARATH MATRICULATION SCHOOL, Nagapattinam
12th |  TAMILNADU HIGHER SECONDARY BOARD |  Percentage: 97.00 / 100.00
2019
RVS BHARATH MATRICULATION SCHOOL

In [14]:
def match_and_rank_resumes2(resume_texts_with_paths, required_skills):
    required_skills_lower = [skill.lower() for skill in required_skills]
    resume_rankings = []
    for resume_text, path in resume_texts_with_paths:
        resume_text_lower = resume_text.lower()
        extracted_skills = extract_skills(resume_text_lower, predefined_skills_lower)
        matching_skills = set(skill.lower() for skill in extracted_skills) & set(required_skills_lower)
        score = len(matching_skills)
        resume_rankings.append((path, score))
    
    ranked_resumes = sorted(resume_rankings, key=lambda x: x[1], reverse=True)
    return ranked_resumes

def process_resumes2(pdf_folder, required_skills):
    resume_texts_with_paths = []

    for pdf_file in os.listdir(pdf_folder):
        if pdf_file.endswith(".pdf"):
            pdf_path = os.path.join(pdf_folder, pdf_file)
            resume_text = extract_text_from_pdf(pdf_path)
            resume_texts_with_paths.append((resume_text, pdf_path))
    

    ranked_resumes = match_and_rank_resumes2(resume_texts_with_paths, required_skills)
    
    return ranked_resumes

pdf_folder = "resumes"
required_skills = ["Python", "Databases", "Programming", "JIRA", "Flask", "Github", "API", "MySQL"]
ranked_resumes = process_resumes2(pdf_folder, required_skills)

for idx, (path, score) in enumerate(ranked_resumes):
    print(f"Resume {idx+1}: Path {path}, Score {score}")


Resume 1: Path resumes\Vijay Veerasekaran - Batch 2025 - B.Tech. - Information Technology - iF5nlmC.pdf, Score 8
Resume 2: Path resumes\Sujay R - Batch 2025 - B.Tech. - Information Technology - ydPZlmC (1).pdf, Score 7
Resume 3: Path resumes\Vishwaa D A - Batch 2025 - B.Tech. - Information Technology - mjXj5ZC (1).pdf, Score 6
Resume 4: Path resumes\Vasundhara Boominathan - Batch 2025 - B.Tech. - Information Technology - 30S8lmC.pdf, Score 3
