In [None]:
import nltk
from collections import defaultdict
import nltk
nltk.download('maxent_ne_chunker_tab')

# Step 1: Download required NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Step 2: Sample paragraph
text = """
Satya Nadella, the CEO of Microsoft, unveiled a partnership with OpenAI to 
integrate advanced AI into Microsoft Office products. The announcement took 
place at the annual Build conference in Seattle, Washington. Meanwhile, Amazon 
is expanding its logistics network across Europe, with new centers opening in 
Paris and Madrid. The news was reported in March 2025 by The Wall Street Journal.
"""

# Step 3: Tokenize into sentences, then words, and tag parts of speech
sentences = nltk.sent_tokenize(text)
tokenized_sentences = [nltk.word_tokenize(sent) for sent in sentences]
pos_tagged_sentences = [nltk.pos_tag(sent) for sent in tokenized_sentences]

# Step 4: Apply Named Entity Recognition
named_entities = [nltk.ne_chunk(tagged_sent) for tagged_sent in pos_tagged_sentences]

# Step 5: Extract and categorize named entities
entity_dict = defaultdict(list)

for tree in named_entities:
    for subtree in tree:
        if hasattr(subtree, 'label'):
            entity_name = " ".join(token for token, pos in subtree)
            entity_type = subtree.label()
            entity_dict[entity_type].append(entity_name)

# Step 6: Display categorized named entities
print("\n=== Named Entities by Type ===\n")
for entity_type, names in entity_dict.items():
    print(f"{entity_type}:")
    for name in set(names):  # Use set to remove duplicates
        print(f"  - {name}")
    print()
