In [3]:
from pdfminer.high_level import extract_text
import re
from sklearn.feature_extraction.text import CountVectorizer

def extract_text_from_pdf(pdf_path):
    """Extracts text from a PDF file."""
    return extract_text(pdf_path)

def extract_name_from_resume(text):
    """Extracts the name from the resume text."""
    name = None
    pattern = r"(\b[A-Z][a-z]+\b)\s(\b[A-Z][a-z]+\b)"
    match = re.search(pattern, text)
    if match:
        name = match.group()
    return name

def extract_contact_number_from_resume(text):
    """Extracts the contact number from the resume text."""
    contact_number = None
    pattern = r"\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b"
    match = re.search(pattern, text)
    if match:
        contact_number = match.group()
    return contact_number

def extract_email_from_resume(text):
    """Extracts the email address from the resume text."""
    email = None
    pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"
    match = re.search(pattern, text)
    if match:
        email = match.group()
    return email

def identify_skills_section(text):
    """Identifies potential skills sections in the text based on specific keywords."""
    # Regex pattern to capture sections under specific headers
    pattern = r"(?i)(languages|tools / libraries|web / os|db / cloud)[:\s\n]*([\s\S]*?)(?=\n\n|\n(?:languages|tools / libraries|web / os|db / cloud|education|experience|projects|work|achievements?|$))"
    matches = re.findall(pattern, text)

    combined_skills_section = ""
    
    # Combine all matched sections into a single string
    for match in matches:
        header, content = match
        combined_skills_section += f"{header}:\n{content.strip()}\n\n"

    return combined_skills_section


def extract_education_from_resume(text):
    """Extracts education information from the resume text."""
    education = []
    pattern = r"(?i)(?:(?:Bachelor|B\.S\.|B\.A\.|Master|M\.S\.|M\.A\.|Ph\.D\.)\s(?:[A-Za-z]+\s)*[A-Za-z]+)"
    matches = re.findall(pattern, text)
    for match in matches:
        education.append(match.strip())
    return education

def extract_experience_from_resume(text):
    """Extracts experience sections from the resume text."""
    experiences = []
    pattern = r"(?:Experience|Work Experience|Previous Jobs|Employment History|Job|Position)[\s\S]*?(?=(?:Education|Skills|Certification|Projects|$))"
    matches = re.finditer(pattern, text, re.IGNORECASE)
    for match in matches:
        context = match.group().strip()
        experiences.append(context)
    return experiences

if __name__ == '__main__':
    pdf_path = "/Users/abhivesh/Abhivesh/Resume Parser/Joshua.pdf"
    text = extract_text_from_pdf(pdf_path)

    # Extract name, contact number, email, skills, education, and experience
    name = extract_name_from_resume(text)
    contact_number = extract_contact_number_from_resume(text)
    email = extract_email_from_resume(text)
    
    # Identify the skills section and extract skills
    skills_section = identify_skills_section(text)
    
    education = extract_education_from_resume(text)
    experiences = extract_experience_from_resume(text)

    # Print formatted output
    print(f"Resume: {pdf_path}")
    print(f"Name: {name if name else 'Name not found'}")
    print(f"Contact Number: {contact_number if contact_number else 'Contact number not found'}")
    print(f"Email: {email if email else 'Email not found'}")
    print(f"Skills: {skills_section if skills_section else 'No significant skills found'}")
    print(f"Education: {education if education else 'No education information found'}")
    if experiences:
        print("")
        for exp in experiences:
            print(exp)
            print("-" * 80)
    else:
        print("No experience sections found")


Resume: /Users/abhivesh/Abhivesh/Resume Parser/Joshua.pdf
Name: Joshua Schmidt
Contact Number: 1 (908) 531-7087
Email: jns223@cornell.edu
Skills: Languages:
Tools / Libraries

Web / OS:
TypeScript, JavaScript, Next.js, HTML, CSS, React.js, Vue.js · Linux — Debian and Arch based, MacOS

DB / Cloud:
MongoDB, Spanner, PostgreSQL, Redis, Elasticsearch · AWS, GCP, Netlify, Firebase, Heroku, Kubernetes, Serverless, CI/CD


Education: ['MASTER OF ENGINEERING IN COMPUTER SCIENCE', 'BACHELOR OF ENGINEERING IN COMPUTER ENGINEERING']

Experience

Google

New York, NY

SOFTWARE ENGINEER, PLAY STORE
Aug 2022 - Present
• Added support for indexing fresh, personalized YouTube developer videos on the Play Store, which increased user engagement by over 30% on

certain surfaces, driving app installs and increasing revenue.

• Worked on
--------------------------------------------------------------------------------
experiences on the Play Store.
• Designed and implemented the Short Form Video Cluster ba