In [16]:
import fitz  # For PDF extraction
import spacy  # For NLP
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
# Load spaCy's language model
nlp = spacy.load("en_core_web_sm")


In [18]:
# Helper function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        text += page.get_text("text")
    return text

# Helper function to split resume text into sections (e.g., Education, Experience, Skills)
def split_resume_into_sections(resume_text):
    sections = {}
    
    # Here you can define your section titles. These can be adjusted based on the resume format.
    section_keywords = {
        'Education': ['education', 'academic background'],
        'Experience': ['experience', 'work history', 'employment'],
        'Skills': ['skills', 'abilities', 'expertise'],
        'Certifications': ['certifications', 'licenses', 'courses'],
    }
    
    # Find sections based on keywords
    for section, keywords in section_keywords.items():
        for keyword in keywords:
            if keyword.lower() in resume_text.lower():
                sections[section] = resume_text.lower().split(keyword, 1)[-1]
                break
    
    return sections

# Function to compare resume sections with job description sections
def compare_resume_with_job(resume_sections, job_description_sections):
    results = {}
    for section, resume_content in resume_sections.items():
        job_content = job_description_sections.get(section, "")
        similarity = compute_similarity(resume_content, job_content)
        results[section] = similarity
    return results

# Helper function to compute similarity between two texts
def compute_similarity(text1, text2):
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    return cosine_similarity(vectors)[0, 1]

# Sample job description split into sections
job_description_sections = {
    'Education': 'BSc in Computer Science or related field',
    'Experience': '3+ years of software development experience, particularly in Python and Django',
    'Skills': 'Proficiency in Python, Django, and front-end technologies like React',
    'Certifications': 'Relevant certifications like AWS or Google Cloud Architect'
}

# Main function to process resume and compare it to the job description
def process_resume(pdf_path, job_description_sections):
    resume_text = extract_text_from_pdf(pdf_path)
    resume_sections = split_resume_into_sections(resume_text)
    comparison_results = compare_resume_with_job(resume_sections, job_description_sections)
    
    # Print or return the similarity results for each section
    for section, similarity in comparison_results.items():
        print(f"{section}: Similarity Score = {similarity:.2f}")
    
    return resume_sections, resume_text, comparison_results

In [19]:
# Example usage:

pdf_path = "resume.pdf"  # Provide the path to the resume PDF
resume_section,  resume_text, comparison_results= process_resume(pdf_path, job_description_sections)


Education: Similarity Score = 0.07
Experience: Similarity Score = 0.32
Skills: Similarity Score = 0.30
Certifications: Similarity Score = 0.11


In [20]:
print(resume_section['Education'])


bachelor of science (bsc) in computer science
university of xyz | graduation: may 2019
relevant coursework:
●
data structures and algorithms
●
web development
●
database management systems
●
cloud computing
certifications
●
aws certified solutions architect – associate
●
google cloud professional cloud architect
certainly! here's a resume tailored to the job description you provided:
technical skills
●
programming languages: python, javascript, html, css
●
frameworks & libraries: django, react, node.js
●
databases: postgresql, mysql
●
cloud platforms: aws, google cloud platform
●
version control: git, github
●
other tools: docker, kubernetes, jenkins
professional experience
software developer
abc tech solutions, new york, ny | june 2021 – present
●
led the development of web applications using django for back-end functionality,
integrating restful apis, and react for front-end components.
●
built and deployed cloud-native applications using aws and google cloud
infrastructure, signifi