**Corpus**

In [2]:
import nltk

nltk.download('brown')
print('---')

from nltk.corpus import brown 

brown_sents = brown.sents(categories='news')
print(brown_sents[0])

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
---
['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', 'Friday', 'an', 'investigation', 'of', "Atlanta's", 'recent', 'primary', 'election', 'produced', '``', 'no', 'evidence', "''", 'that', 'any', 'irregularities', 'took', 'place', '.']


In [3]:
import nltk

nltk.download('averaged_perceptron_tagger') 
print('---')


tagged_sentence = nltk.pos_tag(brown_sents[0])

print(tagged_sentence)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
---
[('The', 'DT'), ('Fulton', 'NNP'), ('County', 'NNP'), ('Grand', 'NNP'), ('Jury', 'NNP'), ('said', 'VBD'), ('Friday', 'NNP'), ('an', 'DT'), ('investigation', 'NN'), ('of', 'IN'), ("Atlanta's", 'NNP'), ('recent', 'JJ'), ('primary', 'JJ'), ('election', 'NN'), ('produced', 'VBD'), ('``', '``'), ('no', 'DT'), ('evidence', 'NN'), ("''", "''"), ('that', 'IN'), ('any', 'DT'), ('irregularities', 'NNS'), ('took', 'VBD'), ('place', 'NN'), ('.', '.')]


In [4]:
nltk.download('maxent_ne_chunker')
nltk.download('words')
print('---')

ner_sentence = nltk.ne_chunk(tagged_sentence)

for ne in ner_sentence:
    print(ne)

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
---
('The', 'DT')
(ORGANIZATION Fulton/NNP County/NNP Grand/NNP Jury/NNP)
('said', 'VBD')
('Friday', 'NNP')
('an', 'DT')
('investigation', 'NN')
('of', 'IN')
("Atlanta's", 'NNP')
('recent', 'JJ')
('primary', 'JJ')
('election', 'NN')
('produced', 'VBD')
('``', '``')
('no', 'DT')
('evidence', 'NN')
("''", "''")
('that', 'IN')
('any', 'DT')
('irregularities', 'NNS')
('took', 'VBD')
('place', 'NN')
('.', '.')


In [5]:
sentence = 'Mercedes is the best Car in Turkey'

sentence = [token for token in sentence.split()]

# pt = pos_tag(sentence)
print(sentence)

['Mercedes', 'is', 'the', 'best', 'Car', 'in', 'Turkey']


In [6]:
pt = nltk.pos_tag(sentence)
ne = nltk.ne_chunk(pt)

for n in ne:
    print(n)

(GPE Mercedes/NNP)
('is', 'VBZ')
('the', 'DT')
('best', 'JJS')
('Car', 'NN')
('in', 'IN')
(GPE Turkey/NNP)


In [7]:
sentence = 'Can machine think? said by Tayyip Erdogan .'
sentence = [token for token in sentence.split()]

pt = nltk.pos_tag(sentence)
ne = nltk.ne_chunk(pt)

for n in ne:
    print(n)

('Can', 'MD')
('machine', 'NN')
('think?', 'NN')
('said', 'VBD')
('by', 'IN')
(PERSON Tayyip/NNP Erdogan/NNP)
('.', '.')


In [8]:
sentence = 'Can machines think? said by Tayyip Erdogan.'
sentence = [token.lower()  for token in sentence.split()]

print(sentence)

['can', 'machines', 'think?', 'said', 'by', 'tayyip', 'erdogan.']


In [9]:
pt = nltk.pos_tag(sentence)
ne = nltk.ne_chunk(pt)

for n in ne:
    print(n)

('can', 'MD')
('machines', 'NNS')
('think?', 'NNS')
('said', 'VBD')
('by', 'IN')
('tayyip', 'NN')
('erdogan.', 'NN')


### Spacy
https://spacy.io/

In [10]:
!pip install spacy



In [0]:
import spacy 

spacy_nlp = spacy.load('en')



## Sentences

In [0]:
sentence = 'I study Computer Engineering In Bilgi University  .'

document = spacy_nlp(sentence)

In [17]:
print(sentence)

print('\n=== POS ===')
for token in document:
    print(f'Token: {token}, POS: {token.pos_}')
    
print('\n=== NER ===')
for token in document.ents:
    print(f'Token: {token}, NE: {token.label_}')
      

I study Computer Engineering In Bilgi University  .

=== POS ===
Token: I, POS: PRON
Token: study, POS: VERB
Token: Computer, POS: PROPN
Token: Engineering, POS: PROPN
Token: In, POS: ADP
Token: Bilgi, POS: PROPN
Token: University, POS: PROPN
Token:  , POS: SPACE
Token: ., POS: PUNCT

=== NER ===
Token: Bilgi University, NE: ORG


In [19]:
sentence = ' '.join(brown_sents[0])
doc = spacy_nlp(sentence)

print(sentence, '\n')

for token in doc.ents:
    print(f'Token: {token}, NE: {token.label_}')

The Fulton County Grand Jury said Friday an investigation of Atlanta's recent primary election produced `` no evidence '' that any irregularities took place . 

Token: The Fulton County Grand Jury, NE: ORG
Token: Friday, NE: DATE
Token: Atlanta, NE: GPE


useful funtion to visualize¶

In [20]:
from spacy import displacy

displacy.render(doc, jupyter=True)

In [21]:
displacy.render(doc, jupyter=True, style='ent')

In [22]:
sentence = 'Where is Silicon Valley ?'

doc = spacy_nlp(sentence)

print(sentence)
print()

for token in doc.ents:
    print(f'Token {token}, NE: {token.label_}')

Where is Silicon Valley ?

Token Silicon Valley, NE: LOC


In [23]:
displacy.render(doc, jupyter=True, style='ent')