# **Named Entity Recognition**

## Step 1: Import the necessary NLTK libraries and download required data

In [3]:
import nltk
nltk.download('words')
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import word_tokenize, PunktSentenceTokenizer
from nltk import pos_tag, ne_chunk

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


## Step 2: Define a function to extract and print named entities

In [4]:
def get_named_entities(text):
    # Step 3: Tokenize the input text into sentences
    sentences = PunktSentenceTokenizer(text).tokenize(text)

    # Step 4: Loop through each sentence in the text
    for sentence in sentences:
       # Tokenize words in the sentence
        words = word_tokenize(sentence)

       # Perform part-of-speech tagging on the words
        tagged = pos_tag(words)

       # Perform named entity chunking
        named_entities = ne_chunk(tagged, binary=False)

       # Step 5: Initialize variables to store named entity data
        named_entity_types = set()
        named_entity_dict = {}

       # Step 6: Loop through the named entities and organize them by type
        for chunk in named_entities:
            if isinstance(chunk, nltk.Tree):
                entity_type = chunk.label()
                entity_name = ' '.join([token for token, tag in chunk.leaves()])
                named_entity_types.add(entity_type)

                if entity_type in named_entity_dict:
                    named_entity_dict[entity_type].append(entity_name)
                else:
                    named_entity_dict[entity_type] = [entity_name]

       # Step 7: Print the named entities by type
        for entity_type in named_entity_types:
            print(f'{entity_type} - {", ".join(named_entity_dict[entity_type])}')

## Step 8: Main program execution

In [5]:
if __name__ == "__main__":

    # Step 9: Get user input for a sentence or paragraph
    user_text = input("Enter a sentence or paragraph: ")

    # Step 10: Call the get_named_entities function to extract and print named entities
    get_named_entities(user_text)

Enter a sentence or paragraph: My name is Pooja. I am doing puja
PERSON - Pooja
