In [1]:
# !pip install spacy



In [2]:
# !python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     -- ------------------------------------- 0.8/12.8 MB 11.7 MB/s eta 0:00:02
     ------------------------------ --------- 9.7/12.8 MB 34.7 MB/s eta 0:00:01
     ---------------------------------------- 12.8/12.8 MB 35.7 MB/s  0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [26]:
import spacy
from spacy import displacy

In [27]:
nlp = spacy.load('en_core_web_sm') #intializing object 
nlp

<spacy.lang.en.English at 0x164433187d0>

In [28]:

#sentence inside nlp
sent = nlp("""Mark Zukerberg will meet Aditya Joshi
on  Monday 25 November 2025, 10am for $3 Trillion deal
at Pune for second time""")
print(sent)

Mark Zukerberg will meet Aditya Joshi
on  Monday 25 November 2025, 10am for $3 Trillion deal
at Pune for second time


In [29]:
#entites present
sent.ents # object type -> tuple of span object

(Mark Zukerberg,
 Aditya Joshi,
 Monday 25 November 2025,
 10am,
 $3 Trillion,
 second)

In [30]:
#finding type of entity
for ent in sent.ents:
    print(ent.text, ' -> ',ent.label_)

Mark Zukerberg  ->  PERSON
Aditya Joshi  ->  PERSON
Monday 25 November 2025  ->  DATE
10am  ->  TIME
$3 Trillion  ->  MONEY
second  ->  ORDINAL


In [31]:
spacy.explain('ORDINAL') #explains labels

'"first", "second", etc.'

In [32]:
raw_text = npl("""Alaska (/əˈlæskə/ ⓘ ə-LASS-kə) is a non-contiguous U.S. state on the northwest extremity of North America. Part of the Western United States region, it is one of the two non-contiguous U.S. states, alongside Hawaii. Alaska is considered to be the northernmost, westernmost, and easternmost (the Aleutian Islands cross the 180th meridian into the eastern hemisphere) state in the United States. It borders the Canadian territory of Yukon and the province of British Columbia to the east. It shares a western maritime border, in the Bering Strait, with Russia's Chukotka Autonomous Okrug. The Chukchi and Beaufort Seas of the Arctic Ocean lie to the north, and the Pacific Ocean lies to the south. It is a semi-exclave of the U.S., and is the largest exclave in the world.

Alaska is the largest U.S. state by area, comprising more total area than the following three largest states of Texas, California, and Montana combined, and is the seventh-largest subnational division in the world. It is the third-least populous and most sparsely populated U.S. state. With a population of 740,133 in 2024, it is the most populous territory in North America located mostly north of the 60th parallel, with more than quadruple the combined populations of Northern Canada and Greenland.[6] Alaska contains the four largest cities in the United States by area, including the state capital of Juneau. Alaska's most populous city is Anchorage, and approximately half of Alaska's residents live within its metropolitan area.""")
raw_text.ents #tuple of entities 

(Alaska,
 ə-LASS-kə,
 U.S.,
 North America,
 Western United States,
 one,
 two,
 U.S.,
 Hawaii,
 Alaska,
 the United States,
 Canadian,
 Yukon,
 British Columbia,
 the Bering Strait,
 Russia,
 Chukotka Autonomous Okrug,
 Chukchi,
 the Arctic Ocean,
 the Pacific Ocean,
 U.S.,
 Alaska,
 U.S.,
 three,
 Texas,
 California,
 Montana,
 seventh,
 third,
 U.S.,
 740,133,
 2024,
 North America,
 60th,
 Northern Canada,
 Alaska,
 four,
 the United States,
 Juneau,
 Alaska,
 Anchorage,
 approximately half,
 Alaska)

In [33]:
# label of entitites
for ent in raw_text.ents:
    print(ent,'->',ent.label_)

Alaska -> GPE
ə-LASS-kə -> PERSON
U.S. -> GPE
North America -> LOC
Western United States -> GPE
one -> CARDINAL
two -> CARDINAL
U.S. -> GPE
Hawaii -> GPE
Alaska -> GPE
the United States -> GPE
Canadian -> NORP
Yukon -> NORP
British Columbia -> GPE
the Bering Strait -> LOC
Russia -> GPE
Chukotka Autonomous Okrug -> FAC
Chukchi -> GPE
the Arctic Ocean -> LOC
the Pacific Ocean -> LOC
U.S. -> GPE
Alaska -> GPE
U.S. -> GPE
three -> CARDINAL
Texas -> GPE
California -> GPE
Montana -> GPE
seventh -> ORDINAL
third -> ORDINAL
U.S. -> GPE
740,133 -> CARDINAL
2024 -> DATE
North America -> LOC
60th -> ORDINAL
Northern Canada -> LOC
Alaska -> GPE
four -> CARDINAL
the United States -> GPE
Juneau -> GPE
Alaska -> GPE
Anchorage -> GPE
approximately half -> CARDINAL
Alaska -> GPE


In [34]:
spacy.explain('LOC')

'Non-GPE locations, mountain ranges, bodies of water'

<h1>Display the NER in interactive way</h1>

In [39]:
displacy.render(raw_text,style='ent')

<h1>ICC para</h1>

In [41]:
text = nlp("""The Chairman heads the board of directors, and on 26 June 2014 Narayanaswami Srinivasan, the former president of Board of Control for Cricket in India, was announced as the first chairman of the council.[8] The role of ICC president became a largely honorary position after the establishment of the chairman role and other changes made to the ICC constitution in 2014. It has been claimed that the 2014 changes have handed control to the 'Big Three' nations of England, India and Australia.[9] The last ICC president was Zaheer Abbas,[10] who was appointed in June 2015 following the resignation of Mustafa Kamal in April 2015. When the post of ICC president was abolished in April 2016, Shashank Manohar, who replaced Srinivasan in October 2015, became the first independent elected chairman of the ICC.[11] Sanjog Gupta became the seventh CEO of the International Cricket Council on July 7, 2025, succeeding Jay Shah.[12] His role involves managing T20 league growth, sustaining cricket formats, promoting Olympic inclusion, and expanding global engagement.[13]""")

text.ents

(26 June 2014,
 Narayanaswami Srinivasan,
 Board of Control for Cricket,
 India,
 first,
 the council.[8],
 2014,
 2014,
 the 'Big Three',
 England,
 India,
 Zaheer,
 June 2015,
 Mustafa Kamal,
 April 2015,
 April 2016,
 Shashank Manohar,
 Srinivasan,
 October 2015,
 first,
 Sanjog Gupta,
 seventh,
 the International Cricket Council,
 July 7, 2025,
 Jay Shah.[12,
 T20,
 Olympic)

In [50]:
# for ent in text.ents:
#     print(ent,' -> ',ent.label_)
desp = [print(x,' ->',x.label_) for x in text.ents]


26 June 2014  -> DATE
Narayanaswami Srinivasan  -> PERSON
Board of Control for Cricket  -> ORG
India  -> GPE
first  -> ORDINAL
the council.[8]  -> ORG
2014  -> DATE
2014  -> DATE
the 'Big Three'  -> ORG
England  -> GPE
India  -> GPE
Zaheer  -> PERSON
June 2015  -> DATE
Mustafa Kamal  -> PERSON
April 2015  -> DATE
April 2016  -> DATE
Shashank Manohar  -> PERSON
Srinivasan  -> ORG
October 2015  -> DATE
first  -> ORDINAL
Sanjog Gupta  -> PERSON
seventh  -> ORDINAL
the International Cricket Council  -> ORG
July 7, 2025  -> DATE
Jay Shah.[12  -> PERSON
T20  -> ORG
Olympic  -> CARDINAL


In [44]:
#Extacting persons name
persons = [x for x in text.ents if x.label_ == 'PERSON']
persons

[Narayanaswami Srinivasan,
 Zaheer,
 Mustafa Kamal,
 Shashank Manohar,
 Sanjog Gupta,
 Jay Shah.[12]