In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.help import upenn_tagset
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
sent = '''I am reading a book.
            It is Python Machine Learning by Example,
            Third Edition.'''
            
sent1 = '''The book written by Hayden Liu in 2018 was sold
at $30 in America'''

In [4]:
tokens2 = nlp(sent)
print([token.text for token in tokens2])

['I', 'am', 'reading', 'a', 'book', '.', '\n            ', 'It', 'is', 'Python', 'Machine', 'Learning', 'by', 'Example', ',', '\n            ', 'Third', 'Edition', '.']


In [5]:
tokens = word_tokenize(sent)
print(nltk.pos_tag(tokens))


[('I', 'PRP'), ('am', 'VBP'), ('reading', 'VBG'), ('a', 'DT'), ('book', 'NN'), ('.', '.'), ('It', 'PRP'), ('is', 'VBZ'), ('Python', 'NNP'), ('Machine', 'NNP'), ('Learning', 'NNP'), ('by', 'IN'), ('Example', 'NNP'), (',', ','), ('Third', 'NNP'), ('Edition', 'NNP'), ('.', '.')]


In [6]:
nltk.help.upenn_tagset('PRP')
nltk.help.upenn_tagset('VBP')

PRP: pronoun, personal
    hers herself him himself hisself it itself me myself one oneself ours
    ourselves ownself self she thee theirs them themselves they thou thy us
VBP: verb, present tense, not 3rd person singular
    predominate wrap resort sue twist spill cure lengthen brush terminate
    appear tend stray glisten obtain comprise detest tease attract
    emphasize mold postpone sever return wag ...


In [7]:
#print([(token.text, token.pos_) for token in tokens2])
tokens3 = nlp(sent1)


In [8]:
print([(token_ent.text, token_ent.label)for token_ent in tokens3.ents])

[('Hayden Liu', 380), ('2018', 391), ('30', 394), ('America', 384)]


##### Stemming and Lemmatization

In [10]:
from nltk.stem.porter import PorterStemmer
porter = PorterStemmer()

In [11]:
porter.stem('machines')

'machin'

In [13]:
porter.stem('learning')

'learn'

In [15]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [16]:
lemmatizer.lemmatize('machines')

'machine'

In [17]:
lemmatizer.lemmatize('learning')

'learning'