In [1]:
import nltk
from nltk import word_tokenize, pos_tag
# Sample sentence
sentence = "The quick brown fox jumps over the lazy dog."

# Tokenize the sentence
tokens = word_tokenize(sentence)

# Perform POS tagging
pos_tags = pos_tag(tokens)

# Display the POS tags
print("POS Tags:")
print(pos_tags)


POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


In [2]:
!pip install spacy



In [3]:
#On terminal (python -m Spacy download en_core_web_sm)

In [4]:
import spacy




In [5]:
# Load the 'en_core_web_sm' model
nlp = spacy.load('en_core_web_sm')
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
# Define a sentence
sentence = "I am learning NLP in Python"

# Process the sentence using spaCy's NLP pipeline
doc = nlp(sentence)

# Iterate through the token and print the token text and POS tag
for token in doc:
    print(token.text, token.pos_)

I PRON
am AUX
learning VERB
NLP PROPN
in ADP
Python PROPN


In [7]:
doc1 = nlp("Apple is planning to buy Indian startup for $1 billion")
for token1 in doc1:
  print(token1, "|", token1.pos_,"|", spacy.explain(token1.pos_),"|",token1.tag_, spacy.explain(token1.tag_))

Apple | PROPN | proper noun | NNP noun, proper singular
is | AUX | auxiliary | VBZ verb, 3rd person singular present
planning | VERB | verb | VBG verb, gerund or present participle
to | PART | particle | TO infinitival "to"
buy | VERB | verb | VB verb, base form
Indian | ADJ | adjective | JJ adjective (English), other noun-modifier (Chinese)
startup | NOUN | noun | NN noun, singular or mass
for | ADP | adposition | IN conjunction, subordinating or preposition
$ | SYM | symbol | $ symbol, currency
1 | NUM | numeral | CD cardinal number
billion | NUM | numeral | CD cardinal number


In [11]:
doc = nlp("""Apple Inc. is an American multinational technology company headquartered in Cupertino, California. It was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976. Apple designs, manufactures, and sells consumer electronics, computer software, and online services. Its best-known hardware products include the iPhone, iPad, Mac, and Apple Watch.""")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Apple Inc.  |  ORG  |  Companies, agencies, institutions, etc.
American  |  NORP  |  Nationalities or religious or political groups
Cupertino  |  GPE  |  Countries, cities, states
California  |  GPE  |  Countries, cities, states
Steve Jobs  |  PERSON  |  People, including fictional
Steve Wozniak  |  PERSON  |  People, including fictional
Ronald Wayne  |  PERSON  |  People, including fictional
April 1976  |  DATE  |  Absolute or relative dates or periods
Apple  |  ORG  |  Companies, agencies, institutions, etc.
iPhone  |  ORG  |  Companies, agencies, institutions, etc.
iPad  |  ORG  |  Companies, agencies, institutions, etc.
Mac  |  PERSON  |  People, including fictional
Apple Watch  |  ORG  |  Companies, agencies, institutions, etc.


In [12]:
from spacy import displacy

displacy.render(doc, style="ent")

In [13]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [14]:
doc = nlp("Michael Bloomberg founded Bloomberg in 1982")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Michael Bloomberg | PERSON | People, including fictional
Bloomberg | PERSON | People, including fictional
1982 | DATE | Absolute or relative dates or periods


In [15]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", ent.start_char, "|", ent.end_char)

Tesla Inc  |  ORG  |  0 | 9
Twitter Inc  |  ORG  |  30 | 41
$45 billion  |  MONEY  |  46 | 57


# Setting custom entities

In [16]:
doc = nlp("Tesla is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla  |  ORG
Twitter  |  PRODUCT
$45 billion  |  MONEY


In [17]:
s = doc[2:5]
s

going to acquire

In [18]:
type(s)

spacy.tokens.span.Span

In [19]:
from spacy.tokens import Span

s1 = Span(doc, 0, 1, label="ORG")
s2 = Span(doc, 5, 6, label="ORG")

doc.set_ents([s1, s2], default="unmodified")

In [20]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla  |  ORG
Twitter  |  ORG
$45 billion  |  MONEY


In [21]:
# Sample text
text = """
Apple Inc. is an American multinational technology company headquartered in Cupertino, California. It was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976. 
Apple designs, manufactures, and sells consumer electronics, computer software, and online services. Its best-known hardware products include the iPhone, iPad, Mac, and Apple Watch.
"""
#I love eating apple while using my Apple watch.

# Process the text using spaCy
doc = nlp(text)

# Print sentences with POS tagging and NER
print("Sentences with POS tagging and NER:")
for sent in doc.sents:
    print("\nSentence:", sent)
    print("POS tagging:", [(token.text, token.pos_) for token in sent])
    print("NER:", [(ent.text, ent.label_) for ent in sent.ents])


Sentences with POS tagging and NER:

Sentence: 
Apple Inc. is an American multinational technology company headquartered in Cupertino, California.
POS tagging: [('\n', 'SPACE'), ('Apple', 'PROPN'), ('Inc.', 'PROPN'), ('is', 'AUX'), ('an', 'DET'), ('American', 'ADJ'), ('multinational', 'ADJ'), ('technology', 'NOUN'), ('company', 'NOUN'), ('headquartered', 'VERB'), ('in', 'ADP'), ('Cupertino', 'PROPN'), (',', 'PUNCT'), ('California', 'PROPN'), ('.', 'PUNCT')]
NER: [('Apple Inc.', 'ORG'), ('American', 'NORP'), ('Cupertino', 'GPE'), ('California', 'GPE')]

Sentence: It was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976. 

POS tagging: [('It', 'PRON'), ('was', 'AUX'), ('founded', 'VERB'), ('by', 'ADP'), ('Steve', 'PROPN'), ('Jobs', 'PROPN'), (',', 'PUNCT'), ('Steve', 'PROPN'), ('Wozniak', 'PROPN'), (',', 'PUNCT'), ('and', 'CCONJ'), ('Ronald', 'PROPN'), ('Wayne', 'PROPN'), ('in', 'ADP'), ('April', 'PROPN'), ('1976', 'NUM'), ('.', 'PUNCT'), ('\n', 'SPACE')]
NER: [('Steve