In [1]:
%pip install spaCy
# You need to download a specific model for each language
# Each language has two models, one for efficiency and one for accuracy. https://spacy.io/usage/models
%python -m spacy download en_core_web_sm # efficent model for English

Collecting spaCy
  Using cached spacy-3.8.2.tar.gz (1.3 MB)
  Installing build dependencies: started
  Installing build dependencies: still running...
  Installing build dependencies: still running...
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spaCy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spaCy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spaCy)
  Using cached murmurhash-1.0.12-cp313-cp313-win_amd64.whl.metadata (2.2 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spaCy)
  Using cached cymem-2.0.11-cp313-cp313-win_amd64.whl.metadata (8.8 kB)
Collecting preshed<3.1

  error: subprocess-exited-with-error
  
  × Building wheel for spaCy (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [1812 lines of output]
      Copied C:\Users\fonta\AppData\Local\Temp\pip-install-v8mr89pi\spacy_cdfab37a2a5747c8b06937e82d9728f7\setup.cfg -> C:\Users\fonta\AppData\Local\Temp\pip-install-v8mr89pi\spacy_cdfab37a2a5747c8b06937e82d9728f7\spacy\tests\package
      Copied C:\Users\fonta\AppData\Local\Temp\pip-install-v8mr89pi\spacy_cdfab37a2a5747c8b06937e82d9728f7\pyproject.toml -> C:\Users\fonta\AppData\Local\Temp\pip-install-v8mr89pi\spacy_cdfab37a2a5747c8b06937e82d9728f7\spacy\tests\package
      Cythonizing sources
      !!
      
              ********************************************************************************
              Please consider removing the following classifiers in favor of a SPDX license expression:
      
              License :: OSI Approved :: MIT License
      
              See https://packaging.python.org/en/latest/gui

# spaCy tutorial

*Notice that the installation doesn’t automatically download models. We need to do that ourselves. (python -m spacy download en_core_web_sm)*

Hello World in spaCy

In [None]:
import spacy
nlp = spacy.load('en_core_web_sm') # load the language model
doc = nlp('Hello World!') #we can pass a string to the model and it will return a Doc object
for token in doc: # the Doc object is iterable and we can iterate over it to get the tokens
    print(token.text)

ModuleNotFoundError: No module named 'spacy'

spaCy preserves this “link” between the word and its place in the raw text. Here’s how to get the exact index of a word:

In [None]:
for token in doc:
    print(token.text + ' ', token.idx) # the idx attribute gives us the index of the token in the original string

Hello  0
World  6
!  11


The **Token** class exposes a lot of word-level attributes. Here are a few examples:

In [None]:
doc = nlp("Next week I'll be in Rome.")
for token in doc:
    print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(
        token.text,
        token.idx,
        token.lemma_,
        token.is_punct,
        token.is_space,
        token.shape_,
        token.pos_,
        token.tag_
    ))

    #for each token, we print the text, index, lemma, punctuation status, space status, shape, part of speech and tag

Next	0	next	False	False	Xxxx	ADJ	JJ
week	5	week	False	False	xxxx	NOUN	NN
I	10	I	False	False	X	PRON	PRP
'll	11	will	False	False	'xx	AUX	MD
be	15	be	False	False	xx	AUX	VB
in	18	in	False	False	xx	ADP	IN
Rome	21	Rome	False	False	Xxxx	PROPN	NNP
.	25	.	True	False	.	PUNCT	.


## Sentence detection
Here’s how to achieve one of the most common NLP tasks with spaCy:

In [None]:
doc = nlp("These are apples. These are oranges.")

for sent in doc.sents:  # the Doc object has a sents attribute which is a generator that yields sentences
    # we can iterate over the sentences in the Doc object
    print(sent)

These are apples.
These are oranges.


## Part Of Speech Tagging
PoS-tagging of a sentence:

In [None]:
doc = nlp("Next week I'll be in Madrid.")
print([(token.text, token.pos_) for token in doc]) # we can use list comprehension to get the text and part of speech of each token in the Doc object

[('Next', 'ADJ'), ('week', 'NOUN'), ('I', 'PRON'), ("'ll", 'AUX'), ('be', 'AUX'), ('in', 'ADP'), ('Madrid', 'PROPN'), ('.', 'PUNCT')]


## Named Entity Recognition
Doing NER with spaCy is super easy and the pretrained model performs pretty well:

In [None]:
doc = nlp("Next week I'll be in Madrid.")
for ent in doc.ents: # the Doc object has an ents attribute which is a list of named entities in the Doc object
    # we can iterate over the named entities in the Doc object
    print(ent.text, ent.label_)

Next week DATE
Madrid GPE


You can also view the IOB style tagging of the sentence like this:

In [None]:
doc = nlp("Next week I'll be in Madrid.")
iob_tagged = [
    (
        token.text,
        token.tag_,
        "{0}-{1}".format(token.ent_iob_, token.ent_type_) if token.ent_iob_ != 'O' else token.ent_iob_
    ) for token in doc
    #insted of using doc.ents, we can use the ent_iob_ and ent_type_ attributes of each token to get the IOB tags
    # the ent_iob_ attribute gives us the IOB tag of the token and the ent_type_ attribute gives us the type of the entity
]
print(iob_tagged)

[('Next', 'JJ', 'B-DATE'), ('week', 'NN', 'I-DATE'), ('I', 'PRP', 'O'), ("'ll", 'MD', 'O'), ('be', 'VB', 'O'), ('in', 'IN', 'O'), ('Madrid', 'NNP', 'B-GPE'), ('.', '.', 'O')]


The spaCy NER also has a healthy variety of entities. You can view the full list here: https://spacy.io/usage/linguistic-features#entity-types

In [None]:
doc = nlp("I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ")
for ent in doc.ents:
    print(ent.text, ent.label_) # we can use the label_ attribute of the named entity to get the type of the entity

2 CARDINAL
9 a.m. TIME
30% PERCENT
just 2 days DATE
WSJ ORG


Let’s use displaCy to view a beautiful visualization of the Named Entity annotated sentence:

In [None]:
from spacy import displacy

doc = nlp('I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ')
displacy.render(doc, style='ent', jupyter=True) # we can use the displacy module to visualize the named entities in the Doc object
# displacy is a module in spaCy that allows us to visualize the named entities in the Doc object

## Chunking
spaCy automatically detects noun-phrases as well:

In [None]:
doc = nlp("Wall Street Journal just published an interesting piece on crypto currencies")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.label_, chunk.root.text)

#A chunk is a contiguous span of tokens that form a single noun phrase.
# The noun_chunks attribute of the Doc object gives us a generator that yields the noun chunks in the Doc object.
# We can iterate over the noun chunks in the Doc object and print the text, label and root of each chunk.
# The label attribute gives us the label of the chunk and the root attribute gives us the root token of the chunk.

Wall Street Journal NP Journal
an interesting piece NP piece
crypto currencies NP currencies


Notice how the chunker also computes the root of the phrase, the main word of the phrase.

## Dependency Parsing

Let’s see the dependency parser in action:

In [None]:
doc = nlp('Wall Street Journal just published an interesting piece on crypto currencies')

for token in doc:
    print("{0}/{1} <--{2}-- {3}/{4}".format(
        token.text, token.tag_, token.dep_, token.head.text, token.head.tag_))

#dependency parsing is the process of analyzing the grammatical structure of a sentence and establishing relationships between words.    
# The dep_ attribute gives us the dependency relation of the token and the head attribute gives us the head token of the token.


Wall/NNP <--compound-- Street/NNP
Street/NNP <--compound-- Journal/NNP
Journal/NNP <--nsubj-- published/VBD
just/RB <--advmod-- published/VBD
published/VBD <--ROOT-- published/VBD
an/DT <--det-- piece/NN
interesting/JJ <--amod-- piece/NN
piece/NN <--dobj-- published/VBD
on/IN <--prep-- piece/NN
crypto/JJ <--compound-- currencies/NNS
currencies/NNS <--pobj-- on/IN


If this doesn’t help visualizing the dependency tree, displaCy comes in handy:

In [None]:
doc = nlp('Wall Street Journal just published an interesting piece on crypto currencies')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 90})

# we can use the displacy module to visualize the dependency parse of the Doc object

# A simple case study
Now, we download a text file and process its content using spaCy.

In [None]:
!wget https://www.gutenberg.org/ebooks/2701.txt.utf-8
!head -n 5 2701.txt.utf-8

--2025-04-15 14:21:09--  https://www.gutenberg.org/ebooks/2701.txt.utf-8
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: http://www.gutenberg.org/cache/epub/2701/pg2701.txt [following]
--2025-04-15 14:21:10--  http://www.gutenberg.org/cache/epub/2701/pg2701.txt
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.gutenberg.org/cache/epub/2701/pg2701.txt [following]
--2025-04-15 14:21:10--  https://www.gutenberg.org/cache/epub/2701/pg2701.txt
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1276288 (1.2M) [text/plain]
Saving to: ‘2701.txt.utf-8.1’


2025-04-15 14:21:10 (3.75 MB/s) - ‘2701.txt.utf-8.1’ sav

Remove the header and footer from the original file.

In [None]:
file = open('2701.txt.utf-8','r')
outFile = open('moby_dick.txt','w')
copyToFile = False
for l in file:
  if l.startswith('*** END OF THE PROJECT GUTENBERG EBOOK'):
    copyToFile = False
  if copyToFile:
    outFile.write(' ')
    outFile.write(l)
    if len(l)==0:
      outFile.write('\n')
  if l.startswith('*** START OF THE PROJECT GUTENBERG EBOOK'):
    copyToFile = True
outFile.close()

In [None]:
!head -n 500 moby_dick.txt

 
 
 
 
 MOBY-DICK;
 
 or, THE WHALE.
 
 By Herman Melville
 
 
 
 CONTENTS
 
 ETYMOLOGY.
 
 EXTRACTS (Supplied by a Sub-Sub-Librarian).
 
 CHAPTER 1. Loomings.
 
 CHAPTER 2. The Carpet-Bag.
 
 CHAPTER 3. The Spouter-Inn.
 
 CHAPTER 4. The Counterpane.
 
 CHAPTER 5. Breakfast.
 
 CHAPTER 6. The Street.
 
 CHAPTER 7. The Chapel.
 
 CHAPTER 8. The Pulpit.
 
 CHAPTER 9. The Sermon.
 
 CHAPTER 10. A Bosom Friend.
 
 CHAPTER 11. Nightgown.
 
 CHAPTER 12. Biographical.
 
 CHAPTER 13. Wheelbarrow.
 
 CHAPTER 14. Nantucket.
 
 CHAPTER 15. Chowder.
 
 CHAPTER 16. The Ship.
 
 CHAPTER 17. The Ramadan.
 
 CHAPTER 18. His Mark.
 
 CHAPTER 19. The Prophet.
 
 CHAPTER 20. All Astir.
 
 CHAPTER 21. Going Aboard.
 
 CHAPTER 22. Merry Christmas.
 
 CHAPTER 23. The Lee Shore.
 
 CHAPTER 24. The Advocate.
 
 CHAPTER 25. Postscript.
 
 CHAPTER 26. Knights and Squires.
 
 CHAPTER 27. Knights and Squires.
 
 CHAPTER 28. Ahab.
 
 CHAPTER 29. Enter Ahab; to Him, Stubb.
 
 CHAPTER 30. The Pipe.
 
 CHAPTER 31. 

Count occurrences for each NOUN in the book.

In [None]:
file = open('moby_dick.txt','r')
text = ""
nouns = {}
for l in file:
  l = l.strip()
  if len(l)==0 and len(text)>0:
    doc = nlp(text)
    for token in doc:
      if token.pos_=='NOUN':
        if token.lemma_ in nouns:
          nouns[token.lemma_] = nouns[token.lemma_] + 1
        else:
          nouns[token.lemma_] = 1
    text = ""
  elif len(l)>0:
    text += " " + l
file.close()

In [None]:
{k: v for k, v in sorted(nouns.items(), key=lambda item: item[1], reverse=True)}

{'whale': 1121,
 'man': 745,
 'ship': 594,
 'sea': 509,
 'boat': 477,
 'time': 438,
 'head': 414,
 'hand': 322,
 'thing': 318,
 '_': 304,
 'way': 287,
 'water': 255,
 'day': 253,
 'eye': 242,
 'side': 227,
 'deck': 209,
 'line': 192,
 'part': 188,
 'life': 182,
 'world': 179,
 'sort': 168,
 'ye': 166,
 'foot': 160,
 'night': 158,
 'fish': 155,
 'crew': 154,
 'air': 140,
 'captain': 140,
 'harpooneer': 130,
 'place': 127,
 'year': 121,
 'arm': 121,
 'body': 119,
 'end': 117,
 'heart': 117,
 'moment': 117,
 'mast': 116,
 'sight': 116,
 'mate': 115,
 'whaleman': 113,
 'leg': 110,
 'voyage': 106,
 'sperm': 106,
 'sail': 105,
 'soul': 104,
 'one': 104,
 'harpoon': 104,
 'sailor': 103,
 'face': 103,
 'word': 102,
 'iron': 101,
 'sun': 101,
 'sir': 100,
 'thou': 100,
 'bone': 98,
 'case': 96,
 'oil': 91,
 'matter': 90,
 'wind': 89,
 'tail': 88,
 'bow': 87,
 'ocean': 85,
 'boy': 83,
 'length': 82,
 'hour': 82,
 'cabin': 82,
 'land': 81,
 'death': 81,
 'vessel': 80,
 'order': 80,
 'name': 79,
 

# Count Entities

1.   Count the number of entities for the following types: PER, LOC and ORG.
2.   Order PERsons according to their occurrences.
2.   Order LOCation according to their occurrences.  

In [None]:
# 1
per = 0
loc = 0
org = 0
file = open('moby_dick.txt','r')
text = ""
for l in file:
  l = l.strip()
  if len(l)==0 and len(text)>0:
    doc = nlp(text)
    for ent in doc.ents:
      if ent.label_=='PERSON':
        per += 1
      elif ent.label_=='LOC':
        loc += 1
      elif ent.label_=='ORG':
        org += 1
    text = ""
  elif len(l)>0:
    text += " " + l
file.close()

print('{0} {1} {2}'.format(per, loc, org))

2249 277 1041


In [None]:
# 2
file = open('moby_dick.txt','r')
text = ""
pers = {}
for l in file:
  l = l.strip()
  if len(l)==0 and len(text)>0:
    doc = nlp(text)
    for ent in doc.ents:
      if ent.label_=='PERSON':
        if ent.text in pers:
          pers[ent.text] = pers[ent.text] + 1
        else:
          pers[ent.text] = 1
    text = ""
  elif len(l)>0:
    text += " " + l
file.close()

{k: v for k, v in sorted(pers.items(), key=lambda item: item[1], reverse=True)}

{'Ahab': 464,
 'Starbuck': 191,
 'Moby Dick': 74,
 'Peleg': 72,
 'Tashtego': 52,
 'Jonah': 46,
 'Stubb': 30,
 'Fedallah': 28,
 'Pip': 21,
 'Gabriel': 20,
 'Ahab’s': 19,
 'Lakeman': 18,
 'Bildad': 17,
 'Daggoo': 16,
 'Perth': 16,
 'Leviathan': 13,
 'Hussey': 13,
 'Cook': 12,
 'Ishmael': 12,
 'Manxman': 11,
 'Dough-Boy': 11,
 'Carpenter': 10,
 'Scoresby': 10,
 'Nantucketers': 10,
 'Mapple': 9,
 'Yojo': 9,
 'Folio': 9,
 'Guernsey': 9,
 'Whales': 8,
 'Jove': 8,
 'Flask': 8,
 'Octavo': 8,
 'Death': 7,
 'Hurrah': 7,
 'Don Sebastian': 7,
 'Mayhew': 7,
 'Vishnoo': 7,
 'Ho': 7,
 'Nantucket': 6,
 'Landlord': 6,
 'Adam': 6,
 'Quohog': 6,
 'Thou': 6,
 'Sleet': 6,
 'Solomon': 6,
 'Steelkilt': 6,
 'Sperm Whales': 6,
 'Line': 6,
 'Pip’s': 6,
 'Mark': 5,
 'Bulkington': 5,
 'Joppa': 5,
 'Nineveh': 5,
 'Duodecimo': 5,
 'Tash': 5,
 'Archy': 5,
 'Don': 5,
 'Hindoo': 5,
 'Java': 5,
 'Derick': 5,
 'Cabin': 4,
 'Johnson': 4,
 'Euroclydon': 4,
 'Cadiz': 4,
 'Delight': 4,
 'Queequeg': 4,
 'Owen': 4,
 'Belshazz

In [None]:
# 3
file = open('moby_dick.txt','r')
text = ""
locs = {}
for l in file:
  l = l.strip()
  if len(l)==0 and len(text)>0:
    doc = nlp(text)
    for ent in doc.ents:
      if ent.label_=='LOC':
        if ent.text in locs:
          locs[ent.text] = locs[ent.text] + 1
        else:
          locs[ent.text] = 1
    text = ""
  elif len(l)>0:
    text += " " + l
file.close()

{k: v for k, v in sorted(locs.items(), key=lambda item: item[1], reverse=True)}

{'Pacific': 29,
 'Atlantic': 16,
 'Cape Horn': 15,
 'Africa': 8,
 'Mediterranean': 8,
 'New England': 7,
 'Andes': 7,
 'West': 7,
 'East': 7,
 'North': 6,
 'Asia': 5,
 'South': 5,
 'Cape': 5,
 'South Sea': 5,
 'Europe': 4,
 'Jupiter': 4,
 'Arctic': 4,
 'the Cape of Good Hope': 4,
 'Temple': 4,
 'the South Seas': 3,
 'Highland': 3,
 'Straits': 3,
 'Crotch': 2,
 'Io': 2,
 'the Polar Sea': 2,
 'the Pacific Ocean': 2,
 'Queequeg': 2,
 'Siberia': 2,
 'the Indian Ocean': 2,
 'Alps': 2,
 'Horn': 2,
 'Hull': 2,
 'South-Sea': 2,
 'Porpoise': 2,
 'the Huzza Porpoise': 2,
 'Baltic': 2,
 'Hudson': 2,
 'the Persian Gulf': 2,
 'the Town-Ho': 2,
 'St. George’s': 2,
 'Sea': 2,
 'Long Island': 2,
 'Shetland': 1,
 'Sperma': 1,
 'the Pacific ocean': 1,
 'the Northern Ocean': 1,
 'the Arctic Ocean': 1,
 'Hardicanutes': 1,
 'the Black Sea': 1,
 'the Cape of Blanco': 1,
 'Battery': 1,
 'Mt. Hecla': 1,
 'the Green Mountains': 1,
 'Green Mountains': 1,
 'the Isle of Desolation': 1,
 'the Straits of Gibraltar'

Extending spaCy
----------------------

The entire spaCy architecture is built upon three building blocks: Document (the big encompassing container), Token (most of the time, a word) and Span (set of consecutive Tokens). The extensions you create can add extra functionality to anyone of the these components. There are some examples out there for what you can do. Let’s create an extension ourselves.

### Creating Document level Extension

In [None]:
import spacy
import nltk

from spacy.tokens import Doc
from nltk.sentiment.vader import SentimentIntensityAnalyzer 
#SentimentIntensityAnalyzer is used 

nltk.download('vader_lexicon')
sentiment_analyzer = SentimentIntensityAnalyzer()

# Define the function that implements the new extension (sentiment analysis)
def polarity_scores(doc):
    return sentiment_analyzer.polarity_scores(doc.text)

# Set the new extension. We are addaind a new component to the pipeline of the Doc object
Doc.set_extension('polarity_scores', getter=polarity_scores, force=True)

doc = nlp("Today, there is the sun and it is a wonderful day!")
print(doc._.polarity_scores)


{'neg': 0.0, 'neu': 0.693, 'pos': 0.307, 'compound': 0.6114}


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
doc = nlp("Today is a nice day!!!")
print(doc._.polarity_scores)

{'neg': 0.0, 'neu': 0.449, 'pos': 0.551, 'compound': 0.5684}


In [None]:
doc = nlp("I love my dog, but I hate cats.")
print(doc._.polarity_scores)

{'neg': 0.433, 'neu': 0.343, 'pos': 0.223, 'compound': -0.5346}


One can easily create extensions for every component type. Such extensions only have access to the context of that component. What happens if you need the tokenized text along with the Part-Of-Speech tags. Let’s now build a custom pipeline. Pipelines are another important abstraction of spaCy. The nlp object goes through a list of pipelines and runs them on the document. For example the tagger is ran first, then the parser and ner pipelines are applied on the already POS annotated document. Here’s how the nlp default pipeline structure looks like:

In [None]:
print(nlp.pipeline)

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec object at 0x7f9231eafd10>), ('tagger', <spacy.pipeline.tagger.Tagger object at 0x7f9231eaf950>), ('parser', <spacy.pipeline.dep_parser.DependencyParser object at 0x7f92cdd3d700>), ('attribute_ruler', <spacy.pipeline.attributeruler.AttributeRuler object at 0x7f9231f91750>), ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer object at 0x7f9231c920d0>), ('ner', <spacy.pipeline.ner.EntityRecognizer object at 0x7f92ec3fa880>)]


### Creating a custom pipeline

Let’s build a custom pipeline that needs to be applied after the tagger pipeline is ran. We need the POS tags to get the Synset from Wordnet.

In [None]:
from nltk.corpus import wordnet as wn
from spacy.tokens import Token
from spacy.language import Language



# Convert between spaCy PoS tag-set and WordNet PoS tag-set
def penn_to_wn(tag):
    if tag.startswith('N'):
        return 'n'

    if tag.startswith('V'):
        return 'v'

    if tag.startswith('J'):
        return 'a'

    if tag.startswith('R'):
        return 'r'
    return None

# Define a class that implements the new component into the pipeline
class WordnetPipeline(object):
    def __init__(self, nlp):
        Token.set_extension('synset', default=None, force=True)

    def __call__(self, doc):
        for token in doc:
            wn_tag = penn_to_wn(token.tag_)
            if wn_tag is None:
                continue
            # We take only the first meaning
            ss = wn.synsets(token.text, wn_tag)[0]
            token._.set('synset', ss)

        return doc

nltk.download('wordnet')

# Python decorators allow you to modify or extend the behavior of functions and methods
@Language.factory("wordnet_pipe")
def wordnet_pipe(nlp, name):
    return WordnetPipeline(nlp)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Setup the new pipeline.

In [None]:
nlp.add_pipe("wordnet_pipe")
doc = nlp("Paris is the awesome capital of France.")

for token in doc:
    print(token.text, token.tag_, token._.synset)

# Let’s see how the pipeline structure looks like
print(nlp.pipeline)

nlp.remove_pipe("wordnet_pipe")

Paris NNP Synset('paris.n.01')
is VBZ Synset('be.v.01')
the DT None
awesome JJ Synset('amazing.s.02')
capital NN Synset('capital.n.01')
of IN None
France NNP Synset('france.n.01')
. . None
[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec object at 0x7f9231eafd10>), ('tagger', <spacy.pipeline.tagger.Tagger object at 0x7f9231eaf950>), ('parser', <spacy.pipeline.dep_parser.DependencyParser object at 0x7f92cdd3d700>), ('attribute_ruler', <spacy.pipeline.attributeruler.AttributeRuler object at 0x7f9231f91750>), ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer object at 0x7f9231c920d0>), ('ner', <spacy.pipeline.ner.EntityRecognizer object at 0x7f92ec3fa880>), ('wordnet_pipe', <__main__.WordnetPipeline object at 0x7f9223e9fd10>)]


('wordnet_pipe', <__main__.WordnetPipeline at 0x7f9223e9fd10>)