In [2]:
import spacy
from spacy.matcher import PhraseMatcher
import re
import json


In [3]:
nlp = spacy.load("en_core_web_sm")


In [4]:
skills_list = [
    "Python", "Java", "C++", "Machine Learning", 
    "Deep Learning", "NLP", "SQL", "Data Science",
    "TensorFlow", "PyTorch", "AWS"
]

education_keywords = [
    "B.Tech", "M.Tech", "B.Sc", "M.Sc",
    "Bachelor", "Master", "PhD",
    "Computer Science", "Engineering"
]


In [5]:
matcher = PhraseMatcher(nlp.vocab)

# Add skill patterns
skill_patterns = [nlp(text) for text in skills_list]
matcher.add("SKILL", skill_patterns)

# Add education patterns
edu_patterns = [nlp(text) for text in education_keywords]
matcher.add("EDUCATION", edu_patterns)


In [6]:
def extract_resume_entities(text):
    doc = nlp(text)

    result = {
        "Name": [],
        "Organizations": [],
        "Education": [],
        "Skills": []
    }

    # Extract default NER entities
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            result["Name"].append(ent.text)
        elif ent.label_ == "ORG":
            result["Organizations"].append(ent.text)

    # Custom Matcher Extraction
    matches = matcher(doc)
    for match_id, start, end in matches:
        label = nlp.vocab.strings[match_id]
        span = doc[start:end]

        if label == "SKILL":
            result["Skills"].append(span.text)
        elif label == "EDUCATION":
            result["Education"].append(span.text)

    # Remove duplicates
    result = {k: list(set(v)) for k, v in result.items()}

    return result


In [7]:
resume_text = input("Enter resume paragraph: ")
output = extract_resume_entities(resume_text)

print("\nExtracted Information:\n")
print(json.dumps(output, indent=4))


Enter resume paragraph:  Alice Johnson completed her M.Sc in Computer Science from MIT.  She worked at Amazon and Google.  She has skills in Python, Deep Learning and AWS.



Extracted Information:

{
    "Name": [
        "Deep Learning",
        "Alice Johnson"
    ],
    "Organizations": [
        "Amazon",
        "Google",
        "MIT"
    ],
    "Education": [
        "Computer Science",
        "M.Sc"
    ],
    "Skills": [
        "AWS",
        "Python",
        "Deep Learning"
    ]
}
