In [3]:
import spacy
nlp = spacy.load('en_core_web_sm')
nlp

<spacy.lang.en.English at 0x14daad8f140>

In [4]:
doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')
doc

Tesla is looking at buying U.S. startup for $6 million

In [5]:
for token in doc:
    print(token.text, token.pos, token.pos_, token.dep_)

Tesla 96 PROPN nsubj
is 87 AUX aux
looking 100 VERB ROOT
at 85 ADP prep
buying 100 VERB pcomp
U.S. 96 PROPN compound
startup 92 NOUN dobj
for 85 ADP prep
$ 99 SYM quantmod
6 93 NUM compound
million 93 NUM pobj


In [6]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x14dacf999d0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x14dacf98d10>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x14dacde6180>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x14dad1f3310>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x14dad1f6190>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x14dacde63b0>)]

In [7]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [8]:
doc2 = nlp(u'This is the first sentence. This is the second sentence. This is the third sentence.')
type(doc2)

spacy.tokens.doc.Doc

In [9]:
span = doc2[6:12]
span

This is the second sentence.

In [10]:
type(span)

spacy.tokens.span.Span

In [11]:
for sent in doc2.sents:
    print(sent)

This is the first sentence.
This is the second sentence.
This is the third sentence.


In [12]:
doc2[6].is_sent_start # 'this'

True

# Magic of entity finding

In [13]:
doc3 = nlp(u'Apple to build a Hong Kong factory for $6 million')

In [14]:
for entity in doc3.ents:
    print(entity, entity.label_, str(spacy.explain(entity.label_)))

Apple ORG Companies, agencies, institutions, etc.
Hong Kong GPE Countries, cities, states
$6 million MONEY Monetary values, including unit


# Use of Noun Chunks

In [15]:
doc4 = nlp(u'Autonomous cars shift insurance liability toward manufacturers.')

In [16]:
for chunk in doc4.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


In [17]:
from spacy import displacy

displacy.render(doc3, style='dep', jupyter=True, options={'distance': 110}) # here dep means to render the syntactic dependency

In [20]:
doc5 = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')

In [21]:
displacy.render(doc5, style='ent', jupyter=True)

In [23]:
doc = nlp(u'This is a sentence.')
displacy.serve(doc, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [20/Dec/2024 15:22:49] "GET / HTTP/1.1" 200 3395
127.0.0.1 - - [20/Dec/2024 15:22:49] "GET /favicon.ico HTTP/1.1" 200 3395
127.0.0.1 - - [20/Dec/2024 15:25:41] "GET / HTTP/1.1" 200 3395
127.0.0.1 - - [20/Dec/2024 15:25:41] "GET /favicon.ico HTTP/1.1" 200 3395


Shutting down server on port 5000.
