# **NLTK vs Spacy vs Stanford CoreNLP**

In [1]:
%load_ext watermark
%watermark -a 'Navin Kumar M 20BAI1094' -v -p nltk,spacy,stanza

Author: Navin Kumar M 20BAI1094

Python implementation: CPython
Python version       : 3.8.10
IPython version      : 7.34.0

nltk  : 3.8.1
spacy : 3.5.2
stanza: 1.5.0



In [2]:
import nltk
import stanza
import spacy

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

nlp = spacy.load("en_core_web_sm")

stanza.download('en')
nlp_stz = stanza.Pipeline('en')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.5.0.json:   0%|   …

2023-08-30 11:14:26 INFO: Downloading default packages for language: en (English) ...
2023-08-30 11:14:27 INFO: File exists: /root/stanza_resources/en/default.zip
2023-08-30 11:14:32 INFO: Finished downloading models and saved to /root/stanza_resources.
2023-08-30 11:14:32 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.5.0.json:   0%|   …

2023-08-30 11:14:33 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| constituency | wsj       |
| depparse     | combined  |
| sentiment    | sstplus   |
| ner          | ontonotes |

2023-08-30 11:14:33 INFO: Using device: cuda
2023-08-30 11:14:33 INFO: Loading: tokenize
2023-08-30 11:14:33 INFO: Loading: pos
2023-08-30 11:14:33 INFO: Loading: lemma
2023-08-30 11:14:33 INFO: Loading: constituency
2023-08-30 11:14:34 INFO: Loading: depparse
2023-08-30 11:14:34 INFO: Loading: sentiment
2023-08-30 11:14:34 INFO: Loading: ner
2023-08-30 11:14:35 INFO: Done loading processors!


## **POS Tagging**

In [3]:
sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "She sells seashells by the seashore.",
    "The cat is sitting on the windowsill.",
    "I enjoy reading books in the evening.",
    "They are cooking dinner in the kitchen.",
    "He played the guitar at the concert."
]

### **NLTK**

In [4]:
for sentence in sentences:
    words = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(words)
    print("Sentence:", sentence)
    print("POS Tags:", pos_tags)

Sentence: The quick brown fox jumps over the lazy dog.
POS Tags: [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
Sentence: She sells seashells by the seashore.
POS Tags: [('She', 'PRP'), ('sells', 'VBZ'), ('seashells', 'NNS'), ('by', 'IN'), ('the', 'DT'), ('seashore', 'NN'), ('.', '.')]
Sentence: The cat is sitting on the windowsill.
POS Tags: [('The', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('sitting', 'VBG'), ('on', 'IN'), ('the', 'DT'), ('windowsill', 'NN'), ('.', '.')]
Sentence: I enjoy reading books in the evening.
POS Tags: [('I', 'PRP'), ('enjoy', 'VBP'), ('reading', 'VBG'), ('books', 'NNS'), ('in', 'IN'), ('the', 'DT'), ('evening', 'NN'), ('.', '.')]
Sentence: They are cooking dinner in the kitchen.
POS Tags: [('They', 'PRP'), ('are', 'VBP'), ('cooking', 'VBG'), ('dinner', 'NN'), ('in', 'IN'), ('the', 'DT'), ('kitchen', 'NN'), ('.', '.')]
Sentence: He played the guitar at t

### **Spacy**

In [5]:
for sentence in sentences:
    doc = nlp(sentence)
    print("Sentence:", sentence)
    for token in doc:
        print(token.text, token.pos_)

Sentence: The quick brown fox jumps over the lazy dog.
The DET
quick ADJ
brown ADJ
fox NOUN
jumps VERB
over ADP
the DET
lazy ADJ
dog NOUN
. PUNCT
Sentence: She sells seashells by the seashore.
She PRON
sells VERB
seashells NOUN
by ADP
the DET
seashore NOUN
. PUNCT
Sentence: The cat is sitting on the windowsill.
The DET
cat NOUN
is AUX
sitting VERB
on ADP
the DET
windowsill NOUN
. PUNCT
Sentence: I enjoy reading books in the evening.
I PRON
enjoy VERB
reading VERB
books NOUN
in ADP
the DET
evening NOUN
. PUNCT
Sentence: They are cooking dinner in the kitchen.
They PRON
are AUX
cooking VERB
dinner NOUN
in ADP
the DET
kitchen NOUN
. PUNCT
Sentence: He played the guitar at the concert.
He PRON
played VERB
the DET
guitar NOUN
at ADP
the DET
concert NOUN
. PUNCT


### **Stanford NLP**

In [6]:
for sentence in sentences:
    doc = nlp_stz(sentence)
    print("Sentence:", sentence)
    for sent in doc.sentences:
        for word in sent.words:
            print(word.text, word.pos)
    print()

Sentence: The quick brown fox jumps over the lazy dog.
The DET
quick ADJ
brown ADJ
fox NOUN
jumps VERB
over ADP
the DET
lazy ADJ
dog NOUN
. PUNCT

Sentence: She sells seashells by the seashore.
She PRON
sells VERB
seashells NOUN
by ADP
the DET
seashore NOUN
. PUNCT

Sentence: The cat is sitting on the windowsill.
The DET
cat NOUN
is AUX
sitting VERB
on ADP
the DET
windowsill NOUN
. PUNCT

Sentence: I enjoy reading books in the evening.
I PRON
enjoy VERB
reading VERB
books NOUN
in ADP
the DET
evening NOUN
. PUNCT

Sentence: They are cooking dinner in the kitchen.
They PRON
are AUX
cooking VERB
dinner NOUN
in ADP
the DET
kitchen NOUN
. PUNCT

Sentence: He played the guitar at the concert.
He PRON
played VERB
the DET
guitar NOUN
at ADP
the DET
concert NOUN
. PUNCT



## **Named Entity Recognition**

In [7]:
sentences = [
    "Apple Inc. is headquartered in Cupertino, California.",
    "The Eiffel Tower is located in Paris, France.",
    "Albert Einstein was born in Ulm, Germany.",
    "Microsoft was founded by Bill Gates and Paul Allen.",
    "The Mona Lisa is displayed at the Louvre Museum in Paris."
]

### **NLTK**

In [8]:
for sentence in sentences:
    words = nltk.word_tokenize(sentence)
    ner_tags = nltk.ne_chunk(nltk.pos_tag(words))
    print("Sentence:", sentence)
    print("NER Tags:", ner_tags)

Sentence: Apple Inc. is headquartered in Cupertino, California.
NER Tags: (S
  (PERSON Apple/NNP)
  (ORGANIZATION Inc./NNP)
  is/VBZ
  headquartered/VBN
  in/IN
  (GPE Cupertino/NNP)
  ,/,
  (GPE California/NNP)
  ./.)
Sentence: The Eiffel Tower is located in Paris, France.
NER Tags: (S
  The/DT
  (ORGANIZATION Eiffel/NNP Tower/NNP)
  is/VBZ
  located/VBN
  in/IN
  (GPE Paris/NNP)
  ,/,
  (GPE France/NNP)
  ./.)
Sentence: Albert Einstein was born in Ulm, Germany.
NER Tags: (S
  (PERSON Albert/NNP)
  (PERSON Einstein/NNP)
  was/VBD
  born/VBN
  in/IN
  (GPE Ulm/NNP)
  ,/,
  (GPE Germany/NNP)
  ./.)
Sentence: Microsoft was founded by Bill Gates and Paul Allen.
NER Tags: (S
  (PERSON Microsoft/NNP)
  was/VBD
  founded/VBN
  by/IN
  (PERSON Bill/NNP Gates/NNP)
  and/CC
  (PERSON Paul/NNP Allen/NNP)
  ./.)
Sentence: The Mona Lisa is displayed at the Louvre Museum in Paris.
NER Tags: (S
  The/DT
  (ORGANIZATION Mona/NNP Lisa/NNP)
  is/VBZ
  displayed/VBN
  at/IN
  the/DT
  (ORGANIZATION Louv

### **Spacy**

In [9]:
for sentence in sentences:
    doc = nlp(sentence)
    print("Sentence:", sentence)
    for ent in doc.ents:
        print(ent.text, ent.label_)

Sentence: Apple Inc. is headquartered in Cupertino, California.
Apple Inc. ORG
Cupertino GPE
California GPE
Sentence: The Eiffel Tower is located in Paris, France.
The Eiffel Tower FAC
Paris GPE
France GPE
Sentence: Albert Einstein was born in Ulm, Germany.
Albert Einstein PERSON
Ulm GPE
Germany GPE
Sentence: Microsoft was founded by Bill Gates and Paul Allen.
Microsoft ORG
Bill Gates PERSON
Paul Allen PERSON
Sentence: The Mona Lisa is displayed at the Louvre Museum in Paris.
the Louvre Museum ORG
Paris GPE


### **Stanford NLP**

In [10]:
for sentence in sentences:
    doc = nlp_stz(sentence)
    print("Sentence:", sentence)
    for sent in doc.sentences:
        for ent in sent.ents:
            print(ent.text, ent.type)

Sentence: Apple Inc. is headquartered in Cupertino, California.
Apple Inc. ORG
Cupertino GPE
California GPE
Sentence: The Eiffel Tower is located in Paris, France.
The Eiffel Tower FAC
Paris GPE
France GPE
Sentence: Albert Einstein was born in Ulm, Germany.
Albert Einstein PERSON
Ulm GPE
Germany GPE
Sentence: Microsoft was founded by Bill Gates and Paul Allen.
Microsoft ORG
Bill Gates PERSON
Paul Allen PERSON
Sentence: The Mona Lisa is displayed at the Louvre Museum in Paris.
The Mona Lisa WORK_OF_ART
the Louvre Museum FAC
Paris GPE


## **Chunking**

In [11]:
sentences = [
    "The big brown dog barked loudly.",
    "She saw a beautiful sunset at the beach.",
    "John read a book in the park.",
    "The cat chased the mouse under the table.",
    "They built a new bridge across the river."
]

grammar = r"""
    NP: {<DT|JJ|NN.*>+}
    VP: {<VB.*><NP|PP|CLAUSE>+}
    CLAUSE: {<NP><VP>}
"""

### **NLTK**

In [12]:
chunk_parser = nltk.RegexpParser(grammar)

# Perform chunking for each sentence
for sentence in sentences:
    words = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(words)
    chunked_tree = chunk_parser.parse(pos_tags)

    print("Sentence:", sentence)
    print("Chunked Tree:", chunked_tree)

Sentence: The big brown dog barked loudly.
Chunked Tree: (S (NP The/DT big/JJ brown/NN dog/NN) barked/VBD loudly/RB ./.)
Sentence: She saw a beautiful sunset at the beach.
Chunked Tree: (S
  She/PRP
  (VP saw/VBD (NP a/DT beautiful/JJ sunset/NN))
  at/IN
  (NP the/DT beach/NN)
  ./.)
Sentence: John read a book in the park.
Chunked Tree: (S
  (CLAUSE (NP John/NNP) (VP read/VBD (NP a/DT book/NN)))
  in/IN
  (NP the/DT park/NN)
  ./.)
Sentence: The cat chased the mouse under the table.
Chunked Tree: (S
  (CLAUSE (NP The/DT cat/NN) (VP chased/VBD (NP the/DT mouse/NN)))
  under/IN
  (NP the/DT table/NN)
  ./.)
Sentence: They built a new bridge across the river.
Chunked Tree: (S
  They/PRP
  (VP built/VBD (NP a/DT new/JJ bridge/NN))
  across/IN
  (NP the/DT river/NN)
  ./.)


### **Spacy**

In [13]:
for sentence in sentences:
    doc = nlp(sentence)
    print("Sentence:", sentence)
    for token in doc:
        print(token.text, token.lemma_)


Sentence: The big brown dog barked loudly.
The the
big big
brown brown
dog dog
barked bark
loudly loudly
. .
Sentence: She saw a beautiful sunset at the beach.
She she
saw see
a a
beautiful beautiful
sunset sunset
at at
the the
beach beach
. .
Sentence: John read a book in the park.
John John
read read
a a
book book
in in
the the
park park
. .
Sentence: The cat chased the mouse under the table.
The the
cat cat
chased chase
the the
mouse mouse
under under
the the
table table
. .
Sentence: They built a new bridge across the river.
They they
built build
a a
new new
bridge bridge
across across
the the
river river
. .


### **Stanford NLP**

In [14]:
for sentence in sentences:
    doc = nlp_stz(sentence)
    print("Sentence:", sentence)
    for sent in doc.sentences:
        for word in sent.words:
            print(word.text, word.lemma)
    print()

Sentence: The big brown dog barked loudly.
The the
big big
brown brown
dog dog
barked bark
loudly loudly
. .

Sentence: She saw a beautiful sunset at the beach.
She she
saw see
a a
beautiful beautiful
sunset sunset
at at
the the
beach beach
. .

Sentence: John read a book in the park.
John John
read read
a a
book book
in in
the the
park park
. .

Sentence: The cat chased the mouse under the table.
The the
cat cat
chased chase
the the
mouse mouse
under under
the the
table table
. .

Sentence: They built a new bridge across the river.
They they
built build
a a
new new
bridge bridge
across across
the the
river river
. .

