In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

In [2]:
#sentence = 'President Karzai thanked his allies for their help in battling terrorism.'
sentence = "Burma's military government said today that ex-Prime Minister Khin Nyunt was ousted last month because his investigations of military commanders threatened the unity of the armed forces."

In [3]:
def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

In [4]:
sent = preprocess(sentence)
sent

[('Burma', 'NNP'),
 ("'s", 'POS'),
 ('military', 'JJ'),
 ('government', 'NN'),
 ('said', 'VBD'),
 ('today', 'NN'),
 ('that', 'IN'),
 ('ex-Prime', 'JJ'),
 ('Minister', 'NNP'),
 ('Khin', 'NNP'),
 ('Nyunt', 'NNP'),
 ('was', 'VBD'),
 ('ousted', 'VBN'),
 ('last', 'JJ'),
 ('month', 'NN'),
 ('because', 'IN'),
 ('his', 'PRP$'),
 ('investigations', 'NNS'),
 ('of', 'IN'),
 ('military', 'JJ'),
 ('commanders', 'NNS'),
 ('threatened', 'VBD'),
 ('the', 'DT'),
 ('unity', 'NN'),
 ('of', 'IN'),
 ('the', 'DT'),
 ('armed', 'JJ'),
 ('forces', 'NNS'),
 ('.', '.')]

In [5]:
pattern = 'NP: {<JJ>*<NNP>*<NNP>}'

In [6]:
cp = nltk.RegexpParser(pattern)
cs = cp.parse(sent)
print(cs)

(S
  (NP Burma/NNP)
  's/POS
  military/JJ
  government/NN
  said/VBD
  today/NN
  that/IN
  (NP ex-Prime/JJ Minister/NNP Khin/NNP Nyunt/NNP)
  was/VBD
  ousted/VBN
  last/JJ
  month/NN
  because/IN
  his/PRP$
  investigations/NNS
  of/IN
  military/JJ
  commanders/NNS
  threatened/VBD
  the/DT
  unity/NN
  of/IN
  the/DT
  armed/JJ
  forces/NNS
  ./.)


In [None]:
from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint
iob_tagged = tree2conlltags(cs)
pprint(iob_tagged)

In [None]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_lg
nlp = en_core_web_lg.load()

In [None]:
doc = nlp(sentence)
pprint([(X.text, X.label_) for X in doc.ents])

In [None]:
pprint([(X, X.ent_iob_, X.ent_type_) for X in doc])

In [None]:
displacy.render(nlp(sentence), jupyter=True, style='ent')

In [None]:
displacy.render(nlp(sentence), style='dep', jupyter = True, options = {'distance': 120})

In [None]:
[(x.orth_,x.pos_, x.lemma_) for x in [y 
                                      for y
                                      in nlp(sentence) 
                                      if not y.is_stop and y.pos_ != 'PUNCT']]