# Lemmatization
## Find the root or general idea of words

In [2]:
import en_core_web_sm
import es_core_news_sm
nlp_en = en_core_web_sm.load()
nlp_es = es_core_news_sm.load()

In [3]:
doc_en = nlp_en(u"I am a runner running in a race, because I love to run since the beginning of the running era")
doc_es = nlp_es(u"A mi gusta correr y comprar, la compra que hice la semana pasada me hizo correr una carrera contra el tiempo")

>>> s0, s1 = 'a', 'bb'
>>>
>>> # Left-aligned with padding *
>>> print(f'{s0:*<7}\n{s1:*<7}')
- a......
- bb.....
>>>
>>> # Right-aligned with padding %
>>> print(f'{s0:%>8}\n{s1:%>8}')
- %%%%%%%a
- %%%%%%bb
>>>
>>> # Center-aligned
>>> print(f'{s0:@^9}\n{s1:@^9}')
@@@@a@@@@
@@@bb@@@@

In [6]:
def show_lemmas(document):
    for token in document:
        print(f"{token.text:{12}} {token.pos_:{6}} {token.lemma:<{20}} {token.lemma_}")

In [7]:
show_lemmas(doc_en)

I            PRON   561228191312463089   -PRON-
am           AUX    10382539506755952630 be
a            DET    11901859001352538922 a
runner       NOUN   12640964157389618806 runner
running      VERB   12767647472892411841 run
in           ADP    3002984154512732771  in
a            DET    11901859001352538922 a
race         NOUN   8048469955494714898  race
,            PUNCT  2593208677638477497  ,
because      SCONJ  16950148841647037698 because
I            PRON   561228191312463089   -PRON-
love         VERB   3702023516439754181  love
to           PART   3791531372978436496  to
run          VERB   12767647472892411841 run
since        SCONJ  10066841407251338481 since
the          DET    7425985699627899538  the
beginning    NOUN   12304532214724622334 beginning
of           ADP    886050111519832510   of
the          DET    7425985699627899538  the
running      VERB   12767647472892411841 run
era          NOUN   7478797442728669872  era


In [8]:
show_lemmas(doc_es)

A            ADP    14862748245026736845 A
mi           DET    8676063341920465065  mi
gusta        NOUN   4524232712002072552  gustar
correr       VERB   10468945644875678183 correr
y            CCONJ  9409450202036847209  y
comprar      VERB   14693903559841170636 comprar
,            PUNCT  2593208677638477497  ,
la           DET    12507729120330568444 lo
compra       NOUN   14693903559841170636 comprar
que          SCONJ  11474801393294414462 que
hice         VERB   17672920047749692400 hacer
la           DET    12507729120330568444 lo
semana       NOUN   5115876900174477068  semana
pasada       ADJ    8365782747564503572  pasar
me           PRON   18197037023634208128 me
hizo         AUX    17672920047749692400 hacer
correr       VERB   10468945644875678183 correr
una          DET    8171375619591740969  uno
carrera      NOUN   17410845618979305672 carrera
contra       ADP    5827824339048469264  contra
el           DET    11488171005156075516 el
tiempo       NOUN   3897009725270