1.	Write a Python program using NLTK to extract named entities from the sentence: "Apple Inc. is looking at buying U.K. startup for $1 billion."


In [None]:
import nltk
nltk.download('all')

In [6]:
# 1. Extract named entities from the sentence
from nltk import word_tokenize, pos_tag, ne_chunk

sentence = "Apple Inc. is looking at buying U.K. startup for $1 billion."
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)
tree = ne_chunk(tags)

named_entities = []
for subtree in tree:
    if isinstance(subtree, nltk.Tree):
        named_entities.append(" ".join([word for word, tag in subtree]))

print(named_entities)

['Apple', 'Inc.']


2.	Using NLTK, write a function that takes a list of sentences and returns a list of named entities found in each sentence.


In [7]:
# 2. Function to extract named entities from a list of sentences
def extract_named_entities_from_sentences(sentences):
    named_entities_list = []
    for sentence in sentences:
        tokens = word_tokenize(sentence)
        tags = pos_tag(tokens)
        tree = ne_chunk(tags)
        named_entities = []
        for subtree in tree:
            if isinstance(subtree, nltk.Tree):
                named_entities.append(" ".join([word for word, tag in subtree]))
        named_entities_list.append(named_entities)
    return named_entities_list

sentences = ["Apple Inc. is looking at buying U.K. startup for $1 billion.",
             "Barack Obama visited New York City last week."]
print(extract_named_entities_from_sentences(sentences))

[['Apple', 'Inc.'], ['Barack', 'Obama', 'New York City']]


3.	Write a Python program that uses NLTK to extract and display all noun phrases from a given text.


In [8]:
# 3. Extract and display all noun phrases from a given text
from nltk import RegexpParser

def extract_noun_phrases(text):
    tokens = word_tokenize(text)
    tags = pos_tag(tokens)

    grammar = "NP: {<DT>?<JJ>*<NN>+}"
    parser = RegexpParser(grammar)
    tree = parser.parse(tags)

    noun_phrases = []
    for subtree in tree:
        if isinstance(subtree, nltk.Tree) and subtree.label() == 'NP':
            noun_phrases.append(" ".join([word for word, tag in subtree]))

    return noun_phrases

text = "The quick brown fox jumped over the lazy dog."
print(extract_noun_phrases(text))

['The quick brown fox', 'the lazy dog']


4.	Using NLTK, write a program to perform chunking on the sentence: "He reckons the current account deficit will narrow to only 8 billion." and display the chunked tree.


In [9]:
# 4. Chunking on the sentence and display the chunked tree
sentence = "He reckons the current account deficit will narrow to only 8 billion."
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

grammar = """
  NP: {<DT>?<JJ>*<NN.*>}
  VP: {<VB.*><NP|PP|CLAUSE>*}
"""
parser = RegexpParser(grammar)
tree = parser.parse(tags)

tree.pretty_print()

                                              S                                                               
   ___________________________________________|__________________________________________________________      
  |       |      |      |     |       |       |                         VP                               |    
  |       |      |      |     |       |       |        _________________|_____________________           |     
  |       |      |      |     |       |       |       |                 NP                    NP         VP   
  |       |      |      |     |       |       |       |         ________|__________           |          |     
He/PRP will/MD to/TO only/RB 8/CD billion/CD ./. reckons/VBZ the/DT current/JJ account/NN deficit/NN narrow/VB



5.	Write a Python function using NLTK that takes a sentence as input and returns all verb phrases (VP) present in the sentence.


In [10]:
# 5. Function to extract all verb phrases (VP) from a sentence
def extract_verb_phrases(sentence):
    tokens = word_tokenize(sentence)
    tags = pos_tag(tokens)

    grammar = "VP: {<VB.*><NP|PP|CLAUSE>*}"
    parser = RegexpParser(grammar)
    tree = parser.parse(tags)

    verb_phrases = []
    for subtree in tree:
        if isinstance(subtree, nltk.Tree) and subtree.label() == 'VP':
            verb_phrases.append(" ".join([word for word, tag in subtree]))

    return verb_phrases

sentence = "She has been working hard on the project."
print(extract_verb_phrases(sentence))

['has', 'been', 'working']


6.	Write a Python program using NLTK to perform named entity recognition (NER) on a paragraph containing multiple sentences. Display the extracted named entities.


In [11]:
# 6. Named entity recognition (NER) on a paragraph with multiple sentences
paragraph = "Apple Inc. is planning to acquire U.K.-based startup for $1 billion. Microsoft is also interested in the deal."
sentences = nltk.sent_tokenize(paragraph)

named_entities = []
for sentence in sentences:
    tokens = word_tokenize(sentence)
    tags = pos_tag(tokens)
    tree = ne_chunk(tags)

    for subtree in tree:
        if isinstance(subtree, nltk.Tree):
            named_entities.append(" ".join([word for word, tag in subtree]))

print(named_entities)

['Apple', 'Inc.', 'Microsoft']


7.	Write a Python program to count the number of named entities of type GPE (Geopolitical Entity) in a given text.


In [15]:
# 7. Count the number of named entities of type GPE (Geopolitical Entity) in a given text
from nltk import word_tokenize, pos_tag, ne_chunk

def count_GPE_entities(text):
    tokens = word_tokenize(text)
    tags = pos_tag(tokens)
    tree = ne_chunk(tags)

    gpe_count = 0
    for subtree in tree:
        if isinstance(subtree, nltk.Tree):
            label = subtree.label()
            if label == 'GPE':  # GPE is used for Geopolitical Entity
                gpe_count += 1

    return gpe_count

text = "Barack Obama is from the United States, and he visited Canada."
print(count_GPE_entities(text))

2


8.	Write a Python program to extract all organization names from a given text using NLTK's Named Entity Recognition (NER).


In [32]:
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.tree import Tree
def extract_organizations(text):
    # Tokenize the text into sentences
    sentences = nltk.sent_tokenize(text)

    organizations = []

    for sentence in sentences:
        # Tokenize each sentence into words
        words = word_tokenize(sentence)
        # Part-of-speech tagging
        pos_tags = pos_tag(words)
        # Named Entity Recognition
        tree = ne_chunk(pos_tags)

        # Extract organizations from the tree
        for subtree in tree:
            if isinstance(subtree, Tree):
                if subtree.label() == 'ORGANIZATION':
                    organization = " ".join([word for word, pos in subtree.leaves()])
                    organizations.append(organization)

    return organizations

text = """
Apple Inc. is an American multinational technology company headquartered in Cupertino, California.
Google LLC is an American multinational technology company that specializes in Internet-related services and products.
Microsoft Corporation is an American multinational technology company with headquarters in Redmond, Washington.
"""

organizations = extract_organizations(text)

print("Organizations found:")
for org in organizations:
    print(org)

Organizations found:
Inc.
LLC
Corporation


9.	Write a Python program using NLTK to extract proper nouns (NNP) from a given sentence.


In [17]:
# 9. Extract proper nouns (NNP) from a given sentence
def extract_proper_nouns(sentence):
    tokens = word_tokenize(sentence)
    tags = pos_tag(tokens)

    proper_nouns = [word for word, tag in tags if tag == 'NNP']

    return proper_nouns

sentence = "Barack Obama visited New York City last week."
print(extract_proper_nouns(sentence))

['Barack', 'Obama', 'New', 'York', 'City']


10.	Write a Python program to extract noun phrases from a given text using a custom chunking grammar.


In [18]:
# 10. Extract noun phrases from a given text using a custom chunking grammar
from nltk import RegexpParser

def extract_noun_phrases_custom_grammar(text):
    tokens = word_tokenize(text)
    tags = pos_tag(tokens)

    grammar = "NP: {<DT>?<JJ>*<NN.*>+}"  # Define noun phrase grammar
    parser = RegexpParser(grammar)
    tree = parser.parse(tags)

    noun_phrases = []
    for subtree in tree:
        if isinstance(subtree, nltk.Tree) and subtree.label() == 'NP':
            noun_phrases.append(" ".join([word for word, tag in subtree]))

    return noun_phrases

text = "The quick brown fox jumped over the lazy dog."
print(extract_noun_phrases_custom_grammar(text))

['The quick brown fox', 'the lazy dog']


11.	Write a Python program to extract verb phrases (VP) from a given sentence using a custom chunking grammar.


In [19]:
# 11. Extract verb phrases (VP) from a given sentence using a custom chunking grammar
def extract_verb_phrases_custom_grammar(sentence):
    tokens = word_tokenize(sentence)
    tags = pos_tag(tokens)

    grammar = "VP: {<VB.*><NP|PP|CLAUSE>*}"  # Define verb phrase grammar
    parser = RegexpParser(grammar)
    tree = parser.parse(tags)

    verb_phrases = []
    for subtree in tree:
        if isinstance(subtree, nltk.Tree) and subtree.label() == 'VP':
            verb_phrases.append(" ".join([word for word, tag in subtree]))

    return verb_phrases

sentence = "She is writing a letter to her friend."
print(extract_verb_phrases_custom_grammar(sentence))

['is', 'writing']


12.	Write a Python program that extracts all named entities and classifies them into their respective categories (PERSON, ORGANIZATION, GPE, etc.).


In [20]:
# 12. Extract all named entities and classify them into their respective categories (PERSON, ORGANIZATION, GPE, etc.)
def classify_named_entities(text):
    tokens = word_tokenize(text)
    tags = pos_tag(tokens)
    tree = ne_chunk(tags)

    classified_entities = {'PERSON': [], 'ORGANIZATION': [], 'GPE': []}

    for subtree in tree:
        if isinstance(subtree, nltk.Tree):
            label = subtree.label()
            entity = " ".join([word for word, tag in subtree])
            if label == 'PERSON':
                classified_entities['PERSON'].append(entity)
            elif label == 'GPE':
                classified_entities['GPE'].append(entity)
            elif label == 'ORGANIZATION':
                classified_entities['ORGANIZATION'].append(entity)

    return classified_entities

text = "Apple Inc. and Microsoft collaborated with Barack Obama."
print(classify_named_entities(text))

{'PERSON': ['Apple', 'Microsoft', 'Barack Obama'], 'ORGANIZATION': ['Inc.'], 'GPE': []}


13.	Write a Python program to visualize named entities using ne_chunk from NLTK.

In [21]:
# 13. Visualize named entities using ne_chunk from NLTK
from nltk import ne_chunk, word_tokenize, pos_tag
import nltk

def visualize_named_entities(text):
    tokens = word_tokenize(text)
    tags = pos_tag(tokens)
    tree = ne_chunk(tags)

    tree.pretty_print()

text = "Apple Inc. is planning to acquire U.K.-based startup for $1 billion."
visualize_named_entities(text)

                                                                S                                                  
   _____________________________________________________________|___________________________________________        
  |         |         |       |            |           |        |     |   |       |       |    PERSON  ORGANIZATION
  |         |         |       |            |           |        |     |   |       |       |      |          |       
is/VBZ planning/VBG to/TO acquire/VB U.K.-based/JJ startup/NN for/IN $/$ 1/CD billion/CD ./. Apple/NNP   Inc./NNP  

