# Part Of Speech(POS)

spacy is used as key library

# 1)- Importing key Modules

In [6]:
#support both Python 2 and Python 3 with minimal overhead.
from __future__ import absolute_import, division, print_function
import warnings
warnings.filterwarnings('ignore')

In [7]:
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span
from spacy import displacy

In [8]:
nlp = spacy.load('en_core_web_sm')

# 2)- POS

In [9]:
doc = nlp('Mr. Jan Van Ande was working as head of department at Rotterdam Business School')

In [10]:
doc

Mr. Jan Van Ande was working as head of department at Rotterdam Business School

In [18]:
displacy.render(doc, style='dep', options = {'compact':True, 'distance':100})

### Custom function for POS

In [19]:
def get_person_orgs(doc):
    person_entities = [ent for ent in doc.ents if ent.label_=="PERSON"]
    for ent in person_entities:
        head = ent.root.head
        if head.lemma_ == 'work':
            preps = [token for token in head.children if token.dep_ == 'prep']
            for prep in preps:
                orgs = [token for token in prep.children if token.ent_type_ == 'ORG']
                print({'person': ent, 'orgs': orgs, 'past': head.tag_ == "VBD"})
    return doc

In [20]:
from spacy.pipeline import merge_entities

In [21]:
nlp = spacy.load('en_core_web_sm')

In [22]:
nlp.add_pipe(merge_entities)

In [23]:
nlp.add_pipe(get_person_orgs)

In [24]:
doc = nlp('Mr. Jan Van Ande was working as head of department at Rotterdam Business School')

{'person': Jan Van Ande, 'orgs': [], 'past': False}


In [25]:
doc = nlp('Mr. Jan Van Ande worked as head of department at Rotterdam Business School')

{'person': Jan Van Ande, 'orgs': [], 'past': True}


See the difference, was working is detected as not past<br>
And worked is detected as past

### Modify model

In [26]:
def get_person_orgs(doc):
    person_entities = [ent for ent in doc.ents if ent.label_=="PERSON"]
    for ent in person_entities:
        head = ent.root.head
        if head.lemma_ == 'work':
            preps = [token for token in head.children if token.dep_ == 'prep']
            for prep in preps:
                orgs = [token for token in prep.children if token.ent_type_ == 'ORG']
                
                aux = [token for token in head.children if token.dep_ == 'aux']
                past_aux = any(t.tag_ == 'VBD' for t in aux)
                past = head.tag_ == 'VBD' or head.tag_ == 'VBG' and past_aux
             
            print({'person': ent, 'orgs': orgs, 'past': past})   
    return doc

In [27]:
nlp = spacy.load('en_core_web_sm')

In [28]:
nlp.add_pipe(merge_entities)

In [29]:
nlp.add_pipe(get_person_orgs)

In [30]:
doc = nlp('Mr. Jan Van Ande was working as head of department at Rotterdam Business School')

{'person': Jan Van Ande, 'orgs': [], 'past': True}
