In [5]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.chunk import tree2conlltags

# Download the 'punkt' and 'averaged_perceptron_tagger' resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger') # Download the resource for the PerceptronTagger
# Download the 'maxent_ne_chunker' resource
nltk.download('maxent_ne_chunker')

# Download the 'words' resource
nltk.download('words') # Download the 'words' resource

# The sentence to analyze
sentence = "Google is planning to open a new office in New York next year."

def analyze_ner(text):
    # Tokenize, POS tag, and perform NER
    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)
    ne_tree = ne_chunk(pos_tags)

    # Convert tree to IOB format for easier processing
    iob_tags = tree2conlltags(ne_tree)
    return iob_tags, ne_tree

# Perform analysis
iob_tags, tree = analyze_ner(sentence)

# Entity descriptions
entity_descriptions = {
    'ORGANIZATION': 'Companies, institutions, agencies, etc.',
    'GPE': 'Geo-political entities like cities, states, countries',
    'DATE': 'Temporal expressions and dates'
}

print("Named Entity Recognition Analysis:\n")
print(f"Original sentence: {sentence}\n")
print("Identified Entities:")

# Process and display results
current_entity = []
for word, pos, tag in iob_tags:
    if tag != 'O':  # O means Outside, not part of named entity
        entity_type = tag[2:] if tag.startswith('B-') or tag.startswith('I-') else tag
        prefix = tag[0] if tag != 'O' else ''
        print(f"{word:<15} - Tag: {tag:<10} (Entity Type: {entity_type})")

print("\nDetailed Analysis:")
entities = {}
current_entity = []
for word, pos, tag in iob_tags:
    if tag.startswith('B-'):
        if current_entity:
            entity_type = current_entity[0][2][2:]
            entity_text = ' '.join(word for word, _, _ in current_entity)
            entities.setdefault(entity_type, []).append(entity_text)
        current_entity = [(word, pos, tag)]
    elif tag.startswith('I-'):
        current_entity.append((word, pos, tag))
    else:
        if current_entity:
            entity_type = current_entity[0][2][2:]
            entity_text = ' '.join(word for word, _, _ in current_entity)
            entities.setdefault(entity_type, []).append(entity_text)
        current_entity = []

for entity_type, entity_list in entities.items():
    description = entity_descriptions.get(entity_type, "Other named entity")
    print(f"\n{entity_type}:")
    print(f"  Entities found: {', '.join(entity_list)}")
    print(f"  Description: {description}")
    print(f"  Significance: ", end="")
    if entity_type == 'ORGANIZATION':
        print("Identifies the company involved in the action")
    elif entity_type == 'GPE':
        print("Specifies the location where the action will take place")
    elif entity_type == 'DATE':
        print("Indicates when the action is planned to occur")

Named Entity Recognition Analysis:

Original sentence: Google is planning to open a new office in New York next year.

Identified Entities:
Google          - Tag: B-GPE      (Entity Type: GPE)
New             - Tag: B-GPE      (Entity Type: GPE)
York            - Tag: I-GPE      (Entity Type: GPE)

Detailed Analysis:

GPE:
  Entities found: Google, New York
  Description: Geo-political entities like cities, states, countries
  Significance: Specifies the location where the action will take place


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
