In [None]:
#seeing the difference in lemmatization between SpaCy and Nltk

In [1]:
sentences = [
    "The children are playing in the gardens.",
    "He bettered his performance in the last match.",
    "They saw the saw on the table.",
    "Running is good for your health.",
    "The mice were eating pieces of cheese."
]

In [None]:
#SpaCy

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")
def spacy_pipe(text):
    doc = nlp(text)
    return [{"sentence": sent.text,
             "tokens": [t.text for t in sent],
             "lemmas": [t.lemma_.lower() for t in sent
                if not t.is_stop       
                and not t.is_punct    
                and not t.is_space    
                and t.lemma_.isalpha()]}
            for sent in doc.sents]

In [3]:
for s in sentences:
    print("\n—", s)
    all_lemmas = []
    for sent in spacy_pipe(s):
        all_lemmas.extend(sent["lemmas"])
    print("spaCy:", all_lemmas)


— The children are playing in the gardens.
spaCy: ['child', 'play', 'garden']

— He bettered his performance in the last match.
spaCy: ['better', 'performance', 'match']

— They saw the saw on the table.
spaCy: ['see', 'saw', 'table']

— Running is good for your health.
spaCy: ['run', 'good', 'health']

— The mice were eating pieces of cheese.
spaCy: ['mouse', 'eat', 'piece', 'cheese']


In [None]:
#Nltk

In [5]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk import pos_tag, word_tokenize

# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

def to_wordnet_pos(tag):
    return {
        'J': wordnet.ADJ,
        'V': wordnet.VERB,
        'N': wordnet.NOUN,
        'R': wordnet.ADV
    }.get(tag[0], wordnet.NOUN)

In [6]:
print("\nNLTK :")
for sent in sentences:
    lemmas = [
        lemmatizer.lemmatize(token, to_wordnet_pos(pos))
        for token, pos in pos_tag(word_tokenize(sent))
    ]
    print(lemmas)


NLTK :
['The', 'child', 'be', 'play', 'in', 'the', 'garden', '.']
['He', 'better', 'his', 'performance', 'in', 'the', 'last', 'match', '.']
['They', 'saw', 'the', 'saw', 'on', 'the', 'table', '.']
['Running', 'be', 'good', 'for', 'your', 'health', '.']
['The', 'mouse', 'be', 'eat', 'piece', 'of', 'cheese', '.']
