## PART 1

## Spacy Tokens, Ent

In [8]:
import spacy

nlp = spacy.load("en_core_web_md")

In [7]:
doc = nlp("Hi, my name is Vincent. I like to write Python")
for token in doc:
    print(
        token,
        token.pos_,
    )

Hi INTJ
, PUNCT
my PRON
name NOUN
is AUX
Vincent PROPN
. PUNCT
I PRON
like VERB
to PART
write VERB
Python PROPN


In [10]:
doc = nlp("Python isn't just a language, it's a community!")
for token in doc:
    print(token)

Python
is
n't
just
a
language
,
it
's
a
community
!


In [17]:
from wasabi import table


def text_to_doctable(txt):
    doc = nlp(txt)
    header = ("text", "lemma", "pos", "ent", "shape", "punct", "morph")
    data = [
        (
            tok.text,
            tok.lemma_,
            tok.pos_,
            tok.ent_type_,
            tok.shape_,
            tok.is_punct,
            tok.morph,
        )
        for tok in doc
    ]
    formatted = table(data, header=header, divider=True)
    print(formatted)


text_to_doctable("Hello internet. My name is Vincent Warmerdam. I like to write Python")


text        lemma       pos     ent      shape   punct   morph                         
---------   ---------   -----   ------   -----   -----   ------------------------------
Hello       hello       INTJ             Xxxxx   False                                 
internet    internet    NOUN             xxxx    False   Number=Sing                   
.           .           PUNCT            .       True    PunctType=Peri                
My          my          PRON             Xx      False   Number=Sing|Person=1|Poss=Yes|PronType=Prs
name        name        NOUN             xxxx    False   Number=Sing                   
is          be          AUX              xx      False   Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin
Vincent     Vincent     PROPN   PERSON   Xxxxx   False   Number=Sing                   
Warmerdam   Warmerdam   PROPN   PERSON   Xxxxx   False   Number=Sing                   
.           .           PUNCT            .       True    PunctType=Peri             

In [19]:
doc = nlp(
    "Hi, my name is Vincent Warmerdam. I live near Amsterdam and I like to write Python"
)
for ent in doc.ents:
    print(ent, ent.label_)

Vincent Warmerdam PERSON
Amsterdam GPE


## Displacy

In [20]:
from spacy import displacy

doc = nlp("Hi, my name is Vincent. I like to write Python")
displacy.render(doc)

In [21]:
doc = nlp("Hi, my name is Vincent. I like to write Python")
displacy.render(doc, style="ent")

In [22]:
doc = nlp("Hi, my name is Vincent Warmerdam. I like to write Python")
displacy.render(doc, style="ent")

## Document Properties

In [24]:
doc = nlp("Hi, my name is Vincent. I like to write Python")
list(doc.sents)

[Hi, my name is Vincent., I like to write Python]

In [28]:
doc = nlp(
    "Star Wars is a very popular science fiction series. This is going to be a great book if you want to read or write."
)
list(doc.noun_chunks)

[Star Wars, a very popular science fiction series, This, a great book, you]

In [32]:
doc = nlp("Hi, my name is Vincent. I like to write Python")
doc.to_json()

{'text': 'Hi, my name is Vincent. I like to write Python',
 'ents': [{'start': 15, 'end': 22, 'label': 'PERSON'}],
 'sents': [{'start': 0, 'end': 23}, {'start': 24, 'end': 46}],
 'tokens': [{'id': 0,
   'start': 0,
   'end': 2,
   'tag': 'UH',
   'pos': 'INTJ',
   'morph': '',
   'lemma': 'hi',
   'dep': 'intj',
   'head': 4},
  {'id': 1,
   'start': 2,
   'end': 3,
   'tag': ',',
   'pos': 'PUNCT',
   'morph': 'PunctType=Comm',
   'lemma': ',',
   'dep': 'punct',
   'head': 4},
  {'id': 2,
   'start': 4,
   'end': 6,
   'tag': 'PRP$',
   'pos': 'PRON',
   'morph': 'Number=Sing|Person=1|Poss=Yes|PronType=Prs',
   'lemma': 'my',
   'dep': 'poss',
   'head': 3},
  {'id': 3,
   'start': 7,
   'end': 11,
   'tag': 'NN',
   'pos': 'NOUN',
   'morph': 'Number=Sing',
   'lemma': 'name',
   'dep': 'nsubj',
   'head': 4},
  {'id': 4,
   'start': 12,
   'end': 14,
   'tag': 'VBZ',
   'pos': 'AUX',
   'morph': 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin',
   'lemma': 'be',
   'dep': 'R

## Data Structures

In [33]:
doc = nlp("Hi. My name is Vincent.")
doc, type(doc)

(Hi. My name is Vincent., spacy.tokens.doc.Doc)

In [36]:
doc[0], type(doc[0])

(Hi, spacy.tokens.token.Token)

In [39]:
doc[2], type(doc[:2])

(My, spacy.tokens.span.Span)