## Named-entity recognition pipeline with ready-made solutions out of the box.

In [14]:
# Install necessary libraries for NER
# !pip install spacy==3.2     
# !pip install spacy_transformers

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

import spacy
from spacy import displacy

In [39]:
# Check spacy version
spacy.__version__

'3.2.0'

#### Download latest transformer-based pipeline from scapy library


In [51]:
# English transformer pipeline (roBERTa-base).
spacy.load("en_core_web_trf")

<spacy.lang.en.English at 0x7fce1d2a79d0>

In [58]:
email_body = """
HI Mr.Anderson,

Hope the email finds you well! I am sending this again just in case my last email was missed.

I am looking for ML tech leads/lead engineers/senior engineers for Meta's rapid growth. I'd love to tell you a bit about our ML plans here specifically in our London office. If relocation is required, Meta will sponsor work visa and full relo support for your family to start anytime in 2022.
Our projects include working with some of the largest deployments of ML globally across our advertising teams, giving you license to experiment with our traffic to make improvements that could effect what our 2+ billion users see on Facebook.
We also have teams that are utilizing ML/NLP/CV to combat fake profiles, false information, spam, online abuse/bullying and hate speech. They run ML algorithms on top of a system which is the largest Haskell based system in production globally. We use this system to identifying users at risk of harming themselves or others based on their online behavior.

Furthermore, we have open source projects Pytorch, Papers with Code and Facebook Shops (our ecommerce platform) teams expanding as well. :)
It's a ML focus role containing ~50% applied research and 50% engineering/building ML products that can impact billions of people and push the state of the art AI. You can find more about our ML work here: https://www.facebook.com/careers/life/machine-learning-at-facebook

Please let me know if you are interested, happy to give you more details!

Thanks, John Doe
Facebook
"""

doc = nlp(email_body)

# Selecting only person entities
org_results = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
person_results = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]

# Displaying result
print("Found Names:", set(person_results))
print("Found Organizations:", set(org_results))

# Entity visualization
displacy.render(doc,style="ent",jupyter=True)

Found Names: {'John Doe', 'Anderson'}
Found Organizations: {'Facebook', 'Meta'}


#### BERT Transformer application for QnA (based on SQuAd question-answering dataset)

In [59]:
from transformers import pipeline

# Instatiate the model from checkpoint
model_checkpoint = "bert-large-uncased-whole-word-masking-finetuned-squad"
model = pipeline(
    'question-answering',
    model=model_checkpoint,
    tokenizer=model_checkpoint
)

#### Let's check how BERT transformer-based model can handle with our questions

In [68]:
# Our questions
questions = [
   "What is the recruiter name?",
   "Who is recipient?",
   "What is the company?",
   "What is the position?",          
   "What is location for this position?"          
]


answers = model(
    context=email_body,
    question=questions,
    topk=1 # Gives 1 answer per question with highest score
)

answers

[{'answer': 'John Doe',
  'end': 1512,
  'score': 0.6521063446998596,
  'start': 1504},
 {'answer': 'Mr.Anderson',
  'end': 15,
  'score': 0.25034067034721375,
  'start': 4},
 {'answer': 'Meta', 'end': 184, 'score': 0.41864168643951416, 'start': 180},
 {'answer': 'ML tech leads/lead engineers/senior engineers',
  'end': 175,
  'score': 0.1773664504289627,
  'start': 130},
 {'answer': 'London', 'end': 278, 'score': 0.6946215629577637, 'start': 272}]

In [86]:
# Summarize answers for our questions
result = pd.DataFrame()
result['Questions'] = questions
result['Answers'] = [d['answer'] for d in answers if 'answer' in d]
result['Relevance score'] =  [d['score'] for d in answers if 'score' in d]
result

Unnamed: 0,Questions,Answers,Relevance score
0,What is the recruiter name?,John Doe,0.652106
1,Who is recipient?,Mr.Anderson,0.250341
2,What is the company?,Meta,0.418642
3,What is the position?,ML tech leads/lead engineers/senior engineers,0.177366
4,What is location for this position?,London,0.694622
