#### Both in stemming and in lemmatization, try to reduce a given word to its root word. 

#### The root word is called a stem in the stemming process, and it is called a lemma in the lemmatization process.

### Lemmatization is derived from lemma, and the lemma of a word corresponds to its dictionary form. 
### Lemma of words are created depending on their meaning (adjective, a noun, or a verb.) in the text they are being used.

In [46]:
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

lemma = WordNetLemmatizer()

words = ['Walked', 'university', 'flying', 'walked']

for w in words:
    print(lemma.lemmatize(w, pos='v'))

Walked
university
fly
walk


#### Why is the first element the same in the output?
#### because, first we have to make it lowercase then tokenize. 
#### now let's see another example

In [50]:
# A Perfect Example

words = 'History is best for learning about life and people. We are all part of history'

lower = words.lower()
token = word_tokenize(lower)
rejoin_words = []

for t in token:
    rejoin_words.append(lemma.lemmatize(t, pos='v'))
print(rejoin_words)

['history', 'be', 'best', 'for', 'learn', 'about', 'life', 'and', 'people', '.', 'we', 'be', 'all', 'part', 'of', 'history']


In [53]:
from nltk.stem import WordNetLemmatizer

WordNetLemmatizer = WordNetLemmatizer()

sent = 'History is the best subject for teaching'
tokens = nltk.word_tokenize(sent)

for t in tokens:
    print(t,'—->', WordNetLemmatizer.lemmatize(t, pos='v'))

History —-> History
is —-> be
the —-> the
best —-> best
subject —-> subject
for —-> for
teaching —-> teach


In [1]:
# using function

philosophy = 'Absorb what is useful, discard what is not, add what is uniquely your own.'

from nltk.stem import WordNetLemmatizer

lemma = WordNetLemmatizer()

def lemma_words(sentence, model=lemma):
    for word in sentence.split():
        lemma = model.lemmatize(word)
        print('word: {} --->: {}'.format(word, lemma))
    
lemma_words(philosophy)

word: Absorb --->: Absorb
word: what --->: what
word: is --->: is
word: useful, --->: useful,
word: discard --->: discard
word: what --->: what
word: is --->: is
word: not, --->: not,
word: add --->: add
word: what --->: what
word: is --->: is
word: uniquely --->: uniquely
word: your --->: your
word: own. --->: own.


In [51]:
# We can see that no is printed as uppercase.

import nltk
from nltk.tokenize import word_tokenize

text = 'No step is small, as long as it is headed in the direction of where we want to go'
token = word_tokenize(text)
quotes = []

for t in token:
    quotes.append(lemma.lemmatize(t, pos='v'))
print(quotes)

['No', 'step', 'be', 'small', ',', 'as', 'long', 'as', 'it', 'be', 'head', 'in', 'the', 'direction', 'of', 'where', 'we', 'want', 'to', 'go']


### Stemming Vs Lemmetization 

In [43]:
from nltk.stem.wordnet import WordNetLemmatizer
lem = WordNetLemmatizer()
from nltk.stem.porter import PorterStemmer
stem = PorterStemmer()

word = "flying"
print("Lemmatized Word:",lem.lemmatize(word,"v"))
print("Stemmed Word:",stem.stem(word))

Lemmatized Word: fly
Stemmed Word: fli


In [58]:
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

stem = PorterStemmer()
lemma = WordNetLemmatizer()

text = "A smart kid ran towards the police station when he saw the thieves approaching."
token = word_tokenize(text)

stemming = [stem.stem(w) for w in token]
print('After Stemming:', stemming, '\n')

lemmatization = [lemma.lemmatize(w) for w in token]
print('After Lemmatization:', lemmatization)

After Stemming: ['a', 'smart', 'kid', 'ran', 'toward', 'the', 'polic', 'station', 'when', 'he', 'saw', 'the', 'thiev', 'approach', '.'] 

After Lemmatization: ['A', 'smart', 'kid', 'ran', 'towards', 'the', 'police', 'station', 'when', 'he', 'saw', 'the', 'thief', 'approaching', '.']


### Apart from the time consuming issue, Lemmetization is much better than stemming.