# Entity extraction use spacy and huggingface NER model

In [6]:
import pandas as pd
import spacy
from transformers import pipeline

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the path to your CSV file
file_path = 'Resume.csv'

# Read the CSV file into a DataFrame
resume_data = pd.read_csv(file_path)
resume_data.head(2)

Unnamed: 0,ID,Resume_str,Resume_html,Category
0,16852973,HR ADMINISTRATOR/MARKETING ASSOCIATE\...,"<div class=""fontsize fontface vmargins hmargin...",HR
1,22323967,"HR SPECIALIST, US HR OPERATIONS ...","<div class=""fontsize fontface vmargins hmargin...",HR


In [None]:

# Display the first few rows of the DataFrame
print(resume_data.head())

# Load a pre-trained NER model from Hugging Face
ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

In [7]:
# Example of processing the first resume in the dataset
resume_text = resume_data.iloc[1]['Resume_str']  # Adjust this if your column name is different
print(resume_text)

         HR SPECIALIST, US HR OPERATIONS       Summary     Versatile  media professional with background in Communications, Marketing, Human Resources and Technology.         Experience     09/2015   to   Current     HR Specialist, US HR Operations    Company Name   －   City  ,   State       Managed communication regarding launch of Operations group, policy changes and system outages      Designed standard work and job aids to create comprehensive training program for new employees and contractors         Audited job postings for old, pending, on-hold and draft positions.           Audited union hourly, non-union hourly and salary background checks and drug screens             Conducted monthly new hire benefits briefing to new employees across all business units               Served as a link between HR Managers and vendors by handling questions and resolving system-related issues         Provide real-time process improvement feedback on key metrics and initiatives  Successfully re-br

In [8]:
# Process text with spaCy
doc = nlp(resume_text)

print("Basic Keywords with spaCy:")
for entity in doc.ents:
    print(f"Entity: {entity.text}, Label: {entity.label_}")

Basic Keywords with spaCy:
Entity: SPECIALIST, Label: ORG
Entity: US, Label: GPE
Entity: Communications, Marketing, Human Resources and Technology, Label: ORG
Entity: US, Label: GPE
Entity: Operations, Label: ORG
Entity: hourly, Label: TIME
Entity: non-union, Label: GPE
Entity: hourly, Label: TIME
Entity: monthly, Label: DATE
Entity: US, Label: GPE
Entity: Business Unit, Label: ORG
Entity: RFI, Label: ORG
Entity: Background Check and Drug Screen, Label: ORG
Entity: SharePoint, Label: ORG
Entity: Researched, Label: ORG
Entity: quarterly, Label: DATE
Entity: Collaborated with Communication, Label: ORG
Entity: Digital Asset Management, Label: ORG
Entity: Marketing Toolkit, Label: PERSON
Entity: Created, Label: ORG
Entity: MySikorsky SharePoint, Label: ORG
Entity: Created, Label: ORG
Entity: annual, Label: DATE
Entity: 100 %, Label: PERCENT
Entity: daily, Label: DATE
Entity: monthly, Label: DATE
Entity: ATM, Label: ORG
Entity: quarterly, Label: DATE
Entity: Utilized, Label: ORG
Entity: SHU

In [9]:
# Advanced keyword extraction with Hugging Face NER model
print("\nAdvanced Keywords with Hugging Face NER model:")
results = ner_model(resume_text)
for entity in results:
    print(f"Entity: {entity['word']}, Label: {entity['entity']}")


Advanced Keywords with Hugging Face NER model:
Entity: US, Label: I-LOC
Entity: Resources, Label: I-ORG
Entity: US, Label: I-ORG
Entity: Operations, Label: I-ORG
Entity: City, Label: I-LOC
Entity: US, Label: I-ORG
Entity: Screen, Label: I-MISC
Entity: IT, Label: I-ORG
Entity: Marketing, Label: I-ORG
Entity: and, Label: I-ORG
Entity: Communications, Label: I-ORG
Entity: Co, Label: I-ORG
Entity: op, Label: I-ORG
Entity: City, Label: I-LOC
Entity: S, Label: I-MISC
Entity: ##hare, Label: I-MISC
Entity: ##P, Label: I-MISC
Entity: Digital, Label: I-MISC
Entity: As, Label: I-MISC
Entity: Too, Label: I-MISC
Entity: ##lk, Label: I-MISC
Entity: ##it, Label: I-MISC
Entity: ##sh, Label: I-MISC
Entity: Calendar, Label: I-MISC
Entity: My, Label: I-MISC
Entity: ##S, Label: I-MISC
Entity: ##iko, Label: I-ORG
Entity: ##rsk, Label: I-ORG
Entity: ##y, Label: I-ORG
Entity: S, Label: I-MISC
Entity: ##hare, Label: I-MISC
Entity: ##P, Label: I-MISC
Entity: ##oint, Label: I-MISC
