In [12]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.tokenize import sent_tokenize
from IPython.display import display, HTML

# Ensure necessary resources are downloaded
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('averaged_perceptron_tagger')
nltk.download('words')

def highlight_entities(text):
    """
    Highlights entities in the text with different colors
    """
    sentences = sent_tokenize(text)
    html_content = "<html><head><style>"
    html_content += ".PERSON { color: blue; font-weight: bold; }"
    html_content += ".GPE { color: green; font-weight: bold; }"
    html_content += ".ORGANIZATION { color: red; font-weight: bold; }"
    html_content += "</style></head><body>"

    # Add color legend
    html_content += "<h2>Entity Highlights:</h2>"
    html_content += "<p><span class='PERSON'>PERSON</span>: Name of a person</p>"
    html_content += "<p><span class='GPE'>GPE</span>: Geopolitical entity or location</p>"
    html_content += "<p><span class='ORGANIZATION'>ORGANIZATION</span>: Name of an organization</p>"
    html_content += "</br>"

    for sentence in sentences:
        tokens = word_tokenize(sentence)
        tagged = pos_tag(tokens)
        chunks = ne_chunk(tagged)

        html_sentence = ""
        for chunk in chunks:
            if isinstance(chunk, nltk.Tree):
                label = chunk.label()
                entity = ' '.join([c[0] for c in chunk])
                if label == "PERSON":
                    html_sentence += f"<span class='PERSON'>{entity}</span> "
                elif label == "GPE":
                    html_sentence += f"<span class='GPE'>{entity}</span> "
                elif label == "ORGANIZATION":
                    html_sentence += f"<span class='ORGANIZATION'>{entity}</span> "
            else:
                html_sentence += f"{chunk[0]} "

        html_content += f"<p>{html_sentence.strip()}</p>"

    html_content += "</body></html>"
    return html_content

# Example text
text = """
"Elon Musk is the CEO of SpaceX and Tesla. The company SpaceX has launched several rockets into space.
In 2021, the company plans to send humans to Mars. Tesla, headquartered in Palo Alto, is known for its electric cars.
The city of Los Angeles has hosted several events organized by Tesla and SpaceX."
"""

# Generate HTML with highlighted entities
html_summary = highlight_entities(text)

# Display the HTML content in Colab
display(HTML(html_summary))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
