# Spacy tutorial

In [1]:
import spacy

## Loading model
Naming model as "nlp" is kind of standard

In [2]:
nlp = spacy.load("en_core_web_lg")

## Document

In [35]:
doc = nlp('White bird feels miserable, sad and depressed without black panter. Black panter loves you.')

## Token

### Check how tokenized

In [36]:
for token in doc[:10]:
    print(token)

White
bird
feels
miserable
,
sad
and
depressed
without
black


### Attributes

In [100]:
t = doc[5]
print(t)
print("Related words: ", t.conjuncts)

print("Dependency in sentence:")
print("\tChildren", list(t.children))
print("\tAncestors", list(t.ancestors))
print("Subtrees:", list(t.subtree))
print("Vector (first 3 numbers):", t.vector[:3])
print("Dependency:", t.dep_)
print("Morphological analysis:", t.morph)
print("Tag:", t.tag_)
print("Is out of vocabulary:", t.is_oov)
print("Is currency symbol:", t.is_currency)
print("Is digit:", t.is_digit)
print("Lexeme:", t.lex)

sad
Related words:  (miserable, depressed)
Dependency in sentence:
	Children [and, depressed]
	Ancestors [miserable, feels]
Subtrees: [sad, and, depressed, without, black, panter]
Vector (first 3 numbers): [5.7062 2.0257 1.3909]
Dependency: conj
Morphological analysis: Degree=Pos
Tag: JJ
Is out of vocabulary: False
Is currency symbol: False
Is digit: False
Lexeme: <spacy.lexeme.Lexeme object at 0x7f102c0db040>


## Sentence

### Checking how segmented

In [None]:
for i, s in enumerate(doc.sents):
    print(s)

### Attributes

In [102]:
s = next(doc.sents)
print("Type: ", type(s))
print("Ents: ", s.ents)
print("Root: ", s.root)
# print("Rights: ")
# for l in s.rights:
#     print("\t", l)
print("Sentiment:", s.sentiment)
print("Root:", s.root)
print("Vector (first 3 numbers): ", s.vector[:3])
print("Vector norm: ", s.vector_norm)
print("Lemmas:", s.lemma_)
print("Subtrees: ")
for subt in s.subtree:
    print("\t", subt)
print("Similarity to next sentence:", s.similarity(list(doc.sents)[1]))

Type:  <class 'spacy.tokens.span.Span'>
Ents:  []
Root:  feels
Sentiment: 0.0
Root: feels
Vector (first 3 numbers):  [-0.8004429 -0.6610359 -2.1754591]
Vector norm:  24.140896
Lemmas: white bird feel miserable, sad and depressed without black panter.
Subtrees: 
	 White
	 bird
	 feels
	 miserable
	 ,
	 sad
	 and
	 depressed
	 without
	 black
	 panter
	 .
Similarity to next sentence: 0.7117020487785339


### Sentence diagram
How words in sentences relates to each other.

In [71]:
from spacy import displacy

options = {"compact": True, "color": "blue"}
displacy.render(s, style="dep", options=options)