In [1]:
import nltk
from nltk import word_tokenize, pos_tag
from nltk.tree import Tree

# Download necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def extract_noun_phrases(text):
    tokens = word_tokenize(text)  # Tokenize the text
    pos_tags = pos_tag(tokens)  # Part-of-speech tagging

    # Define a simple grammar for noun phrases (NP)
    grammar = "NP: {<DT>?<JJ>*<NN.*>+}"  # Determiner (optional) + Adjective (optional) + Noun(s)

    # Create a parser
    chunk_parser = nltk.RegexpParser(grammar)
    chunk_tree = chunk_parser.parse(pos_tags)

    noun_phrases = []
    for subtree in chunk_tree:
        if isinstance(subtree, Tree) and subtree.label() == "NP":
            noun_phrase = " ".join(word for word, pos in subtree.leaves())
            noun_phrases.append(noun_phrase)

    return noun_phrases

# Example usage
text = "The quick brown fox jumps over the lazy dog. Apple Inc. is a big company."

noun_phrases = extract_noun_phrases(text)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\noele\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\noele\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
print("Noun Phrases:", noun_phrases)

Noun Phrases: ['The quick brown fox', 'the lazy dog', 'Apple Inc.', 'a big company']
