#### Demostrating text pre-prcessing pipelines using POS tagging, NER, and polysemy

In [2]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
from nltk import pos_tag, ne_chunk

# Ensure necessary NLTK resources are downloaded
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('wordnet')

# Function for POS Tagging
def pos_tagging(tokens):
    return pos_tag(tokens)

# Function for Named Entity Recognition
def named_entity_recognition(pos_tags):
    return ne_chunk(pos_tags)

# Function for Polysemy Detection
def find_polysemy(word):
    synsets = wn.synsets(word)
    return len(synsets) > 1

# Function to run the complete text processing pipeline
def text_processing_pipeline(text):
    # Step 1: Tokenize the text
    tokens = word_tokenize(text)
    
    # Step 2: POS Tagging
    pos_tags = pos_tagging(tokens)
    print("POS Tagging:")
    for word, tag in pos_tags:
        print(f"{word}: {tag}")
    
    # Step 3: Named Entity Recognition
    ner_tree = named_entity_recognition(pos_tags)
    print("\nNamed Entity Recognition:")
    for subtree in ner_tree:
        if hasattr(subtree, 'label'):
            print(f"{' '.join(c[0] for c in subtree)}: {subtree.label()}")
    
    # Step 4: Polysemy Detection
    print("\nPolysemy Detection:")
    for token in tokens:
        if find_polysemy(token):
            print(f"'{token}' has multiple meanings.")

# Example article text
article_text = """
The lead engineer for the new bridge project made a breakthrough in New York. 
The team is now looking at the potential impacts of their findings on the new soil. 
Meanwhile, Virat Kohli hit his double ton in sports news last night. 
The lead engineer of the project team is highly respected.
"""
# Run the text processing pipeline
text_processing_pipeline(article_text)

POS Tagging:
The: DT
lead: NN
engineer: NN
for: IN
the: DT
new: JJ
bridge: NN
project: NN
made: VBD
a: DT
breakthrough: NN
in: IN
New: NNP
York: NNP
.: .
The: DT
team: NN
is: VBZ
now: RB
looking: VBG
at: IN
the: DT
potential: JJ
impacts: NNS
of: IN
their: PRP$
findings: NNS
on: IN
the: DT
new: JJ
soil: NN
.: .
Meanwhile: RB
,: ,
Virat: NNP
Kohli: NNP
hit: VBD
his: PRP$
double: JJ
ton: NN
in: IN
sports: NNS
news: NN
last: JJ
night: NN
.: .
The: DT
lead: JJ
engineer: NN
of: IN
the: DT
project: NN
team: NN
is: VBZ
highly: RB
respected: VBN
.: .

Named Entity Recognition:
New York: GPE
Virat Kohli: PERSON

Polysemy Detection:
'lead' has multiple meanings.
'engineer' has multiple meanings.
'new' has multiple meanings.
'bridge' has multiple meanings.
'project' has multiple meanings.
'made' has multiple meanings.
'a' has multiple meanings.
'breakthrough' has multiple meanings.
'in' has multiple meanings.
'New' has multiple meanings.
'team' has multiple meanings.
'is' has multiple meanings.
'n

[nltk_data] Downloading package punkt to C:\Users\Shailendra
[nltk_data]     Kadre\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Shailendra Kadre\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\Shailendra Kadre\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to C:\Users\Shailendra
[nltk_data]     Kadre\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Shailendra
[nltk_data]     Kadre\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Code Snippet 6.5