In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [2]:
text = ''' Byju’s had been a partner of the Indian cricket team since 2019, with its branding featured on the front of the team’s jersey. In June last year, Byju’s extended its sponsorship rights with the BCCI till November. 
The ed-tech firm had asked the cricket board to encash a ₹140 crore bank guarantee, with the remaining ₹160 crore to be paid in instalments.
In November, Byju’s hinted at possible talks on a settlement between the two. However, the company has been facing a severe financial crisis and is unable to repay debts owed to its creditors.'''

In [4]:
doc = nlp(text)

In [6]:
for ents in doc.ents:
    print(ents,"-->",ents.label_,"-->",spacy.explain(ents.label_))

Byju’s --> ORG --> Companies, agencies, institutions, etc.
Indian --> NORP --> Nationalities or religious or political groups
2019 --> DATE --> Absolute or relative dates or periods
June last year --> DATE --> Absolute or relative dates or periods
Byju --> PERSON --> People, including fictional
BCCI --> ORG --> Companies, agencies, institutions, etc.
November --> DATE --> Absolute or relative dates or periods
₹ --> ORG --> Companies, agencies, institutions, etc.
160 --> MONEY --> Monetary values, including unit
November --> DATE --> Absolute or relative dates or periods
Byju --> PERSON --> People, including fictional
two --> CARDINAL --> Numerals that do not fall under another type


In [7]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\tr4\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\tr4\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\tr4\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping chunkers\maxent_ne_chunker.zip.
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\tr4\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\words.zip.


True

In [7]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')


# Tokenize the text into words
tokens = nltk.word_tokenize(text)
stop_words = set(stopwords.words('english'))
tokens_filtered = [w for w in tokens if not w.lower() in stop_words]
length = len(tokens_filtered)
for i in range (1, length):
	if i in (',', '.', '_'):
		tokens_filtered.remove(tokens_filtered[i])
    

# Apply part-of-speech tagging to the tokens
tagged = nltk.pos_tag(tokens_filtered)

# Apply named entity recognition to the tagged words
entities = nltk.chunk.ne_chunk(tagged)

# Print the entities found in the text
for entity in entities:
	print(entity)


(PERSON Byju/NNP)
('’', 'NNP')
('partner', 'NN')
(GPE Indian/JJ)
('cricket', 'NN')
('team', 'NN')
('since', 'IN')
('2019', 'CD')
(',', ',')
('branding', 'VBG')
('featured', 'VBN')
('front', 'JJ')
('team', 'NN')
('’', 'NNP')
('jersey', 'NN')
('.', '.')
('June', 'NNP')
('last', 'JJ')
('year', 'NN')
(',', ',')
(PERSON Byju/NNP)
('’', 'NNP')
('extended', 'VBD')
('sponsorship', 'NN')
('rights', 'NNS')
(ORGANIZATION BCCI/NNP)
('till', 'NN')
('November', 'NNP')
('.', '.')
('ed-tech', 'JJ')
('firm', 'NN')
('asked', 'VBD')
('cricket', 'NNS')
('board', 'NN')
('encash', 'VBP')
('₹140', 'NNP')
('crore', 'VBD')
('bank', 'NN')
('guarantee', 'NN')
(',', ',')
('remaining', 'VBG')
('₹160', 'JJ')
('crore', 'NN')
('paid', 'VBD')
('instalments', 'NNS')
('.', '.')
('November', 'NNP')
(',', ',')
(PERSON Byju/NNP)
('’', 'NNP')
('hinted', 'VBD')
('possible', 'JJ')
('talks', 'NNS')
('settlement', 'NN')
('two', 'CD')
('.', '.')
('However', 'RB')
(',', ',')
('company', 'NN')
('facing', 'VBG')
('severe', 'JJ')
('

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\tr4\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
