## NLTK

In [42]:

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

from pprint import pprint

In [58]:
text = "How do you join NATO and how close is Ukraine to becoming a member? by Orson Wells. Paid 1$"

In [59]:
tokens = word_tokenize(text)
tags = pos_tag(tokens)

In [60]:
pprint(tags)

[('How', 'WRB'),
 ('do', 'VB'),
 ('you', 'PRP'),
 ('join', 'VB'),
 ('NATO', 'NNP'),
 ('and', 'CC'),
 ('how', 'WRB'),
 ('close', 'JJ'),
 ('is', 'VBZ'),
 ('Ukraine', 'JJ'),
 ('to', 'TO'),
 ('becoming', 'VBG'),
 ('a', 'DT'),
 ('member', 'NN'),
 ('?', '.'),
 ('by', 'IN'),
 ('Orson', 'NNP'),
 ('Wells', 'NNP'),
 ('.', '.'),
 ('Paid', 'VBD'),
 ('1', 'CD'),
 ('$', '$')]


In [61]:
ne_tree = nltk.ne_chunk(tags)
print(ne_tree)

(S
  How/WRB
  do/VB
  you/PRP
  join/VB
  (ORGANIZATION NATO/NNP)
  and/CC
  how/WRB
  close/JJ
  is/VBZ
  (GPE Ukraine/JJ)
  to/TO
  becoming/VBG
  a/DT
  member/NN
  ?/.
  by/IN
  (PERSON Orson/NNP Wells/NNP)
  ./.
  Paid/VBD
  1/CD
  $/$)


In [62]:
# GPE localization
# Organization
# Person

In [64]:
for tagged_word in ne_tree:
    if hasattr(tagged_word, 'label'):
        print(tagged_word.label())
        print(tagged_word.leaves())


ORGANIZATION
[('NATO', 'NNP')]
GPE
[('Ukraine', 'JJ')]
PERSON
[('Orson', 'NNP'), ('Wells', 'NNP')]


## SCAPY

In [65]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()

In [99]:
text = "How do you join NATO and how close is Ukraine to becoming a member? by Orson Wells Paid with $10.0 in 20/12/2022"

In [100]:
train = nlp(text)

In [101]:
train.ents

(NATO, Ukraine, Orson Wells Paid, 10.0, 20/12/2022)

In [102]:
for entity in train.ents:
    print(entity.text, entity.label)

NATO 383
Ukraine 384
Orson Wells Paid 380
10.0 394
20/12/2022 391


In [103]:
#https://spacy.io/usage/visualizers#ent
displacy.render(train, jupyter=True, style="ent")

# ACTIVITY
- Can we apply scapy to NER in other languages (Catala, Spanish,...)?