In [1]:
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span
import PyPDF2

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
# Define the patterns for matching different sections of the resume
patterns = [
    {'label': 'SKILLS', 'pattern': [{'LOWER': 'skills'}]},
    {'label': 'PROFESSIONAL EXPERIENCE', 'pattern': [{'LOWER': 'professional experience'}]},
    {'label': 'EDUCATION', 'pattern': [{'LOWER': 'education'}]}
]

In [8]:
matcher = Matcher(nlp.vocab)
for pattern in patterns:
    matcher.add(pattern['label'], [pattern['pattern']])

In [10]:
def extract_skills(text):
    doc = nlp(text)
    skills = []
    for match_id, start, end in matcher(doc):
        if doc.vocab.strings[match_id] == 'SKILLS':
            skills.extend([t.text for t in doc[start:end].noun_chunks if not t.is_stop])
    return skills


def extract_experience(text):
    doc = nlp(text)
    experiences = []
    for match_id, start, end in matcher(doc):
        if doc.vocab.strings[match_id] == 'PROFESSIONAL EXPERIENCE':
            experiences.append(doc[start:end].text)
    return experiences


def extract_education(text):
    doc = nlp(text)
    educations = []
    for match_id, start, end in matcher(doc):
        if doc.vocab.strings[match_id] == 'EDUCATION':
            educations.append(doc[start:end].text)
    return educations


def parse_pdf_resume(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        resume_text = ''
        for page in reader.pages:
            resume_text += page.extract_text()

    skills = extract_skills(resume_text)
    experience = extract_experience(resume_text)
    education = extract_education(resume_text)

    return skills, experience, education,

# Example usage
pdf_resume_path = 'Old_Resume.pdf'

skills, experience, education = parse_pdf_resume(pdf_resume_path)

print('Skills:', skills)
print('Professional Experience:', experience)
print('Education:', education)

Skills: []
Professional Experience: []
Education: ['EDUCATION']
