#### Tutorial on Synonyms, Antonyms, Homophones, Homographs, Polysemy, and Hyponyms

In [38]:
# Imports the nltk library for text processing
import nltk

# Imports WordNet to access synonyms, antonyms, and word meanings.
from nltk.corpus import wordnet as wn

# Imports the function to tokenize text into words.
from nltk.tokenize import word_tokenize

In [None]:
# Downloads the WordNet lexical database.
nltk.download('wordnet')

# Downloads the Open multilingual WordNet package. 
# It is needed for some language-related tasks.
nltk.download('omw-1.4')

# Downloads the Punkt tokenizer models for sentence and word tokenization.
nltk.download('punkt')

In [40]:
# Mention the sample text.
article_text = """
The lead engineer in the new bridge project has made a breakthrough. 
The team is now looking at the potential impacts of their findings on the new soil. 
Meanwhile, a local cricket player hit his double ton in sports news last night. 
The lead engineer of the project team is highly respected.
"""

#### Tokenize the article

In [41]:
# Tokenizes the article text into individual words or tokens.
tokens = word_tokenize(article_text)

#### 1. Synonyms Detection

In [42]:
# Prints the header for synonym detection.
print("Synonyms Detection:")

# Iterates over the first 5 tokens.
for token in tokens[:5]:
    # Retrieves WordNet synsets for the token.
    synsets = wn.synsets(token)
    
    # Initializes a set to collect synonyms.
    synonyms = set()
    
    # Iterates over each synset to gather synonyms.
    for synset in synsets:
        for lemma in synset.lemmas():
            synonyms.add(lemma.name())
    
    # Prints the synonyms if found, otherwise, it indicates none were found.
    if synonyms:
        print(f"Synonyms for '{token}': {', '.join(synonyms)}")
    else:
        print(f"No synonyms found for '{token}'")

Synonyms Detection:
No synonyms found for 'The'
Synonyms for 'lead': wind, steer, lead-in, Pb, lede, jumper_lead, extend, trail, guide, moderate, head, booster_cable, precede, tip, result, jumper_cable, confidential_information, pass, principal, atomic_number_82, run, track, lead_story, take, pencil_lead, leading, tether, contribute, star, hint, direct, conduct, leave, conduce, lead, top, go, spark_advance, leash, chair
Synonyms for 'engineer': organise, mastermind, engineer, technologist, direct, orchestrate, applied_scientist, railroad_engineer, locomotive_engineer, engine_driver, organize
Synonyms for 'in': inch, IN, Indiana, In, inward, inwards, in, indium, atomic_number_49, Hoosier_State
No synonyms found for 'the'


#### 2. Antonyms Detection

In [43]:
# Prints the header for antonym detection.
print("\nAntonyms Detection:")

# Iterates over each token.
for token in tokens:
    # Retrieves WordNet synsets for the token.
    synsets = wn.synsets(token)
    
    # Initializes a set to collect antonyms.
    antonyms = set()
    
    # Iterates over each synset to gather antonyms.
    for synset in synsets:
        for lemma in synset.lemmas():
            # Checks for antonyms and adds them to the set.
            if lemma.antonyms():
                antonyms.update(ant.name() for ant in lemma.antonyms())
    
    # Prints the antonyms if found.
    if antonyms:
        print(f"Antonyms for '{token}': {', '.join(antonyms)}")


Antonyms Detection:
Antonyms for 'lead': follow, deficit
Antonyms for 'new': worn, old
Antonyms for 'has': refuse, lack, abstain
Antonyms for 'made': unmake, unmade, break
Antonyms for 'is': differ
Antonyms for 'looking': back
Antonyms for 'potential': actual
Antonyms for 'findings': lose
Antonyms for 'on': off
Antonyms for 'new': worn, old
Antonyms for 'soil': clean
Antonyms for 'local': express, national, general
Antonyms for 'hit': miss
Antonyms for 'double': multivalent, single, univalent
Antonyms for 'last': first
Antonyms for 'night': day
Antonyms for 'lead': follow, deficit
Antonyms for 'is': differ
Antonyms for 'respected': disesteem, disrespect


#### 3. Homophones Detection (alternate approach)

In [44]:
print("\nHomophones Detection:")
# Homophones are generally detected using phonetic algorithms or external libraries like `fuzzy` or `PyDictionary`. 
# Here we might use a simple custom method based on pronunciation or external data sources.

# Defines a function to check if two words are homophones based on a simplified rule.
def is_homophone(word1, word2):
    return word1.lower() == word2.lower() and word1 != word2

# Example list of homophone pairs.
homophones_list = [('lead', 'led'), ('bare', 'bear'), ('pair', 'pear')]

# Iterates over each token.
for token in tokens:
    # Finds homophones for the current token based on the example list.
    homophones = [pair[1] for pair in homophones_list if pair[0] == token.lower()]
    
    # Prints homophones if found.
    if homophones:
        print(f"Homophones for '{token}': {', '.join(homophones)}")


Homophones Detection:
Homophones for 'lead': led
Homophones for 'lead': led


#### 4. Homographs Detection

In [46]:
# Prints the header for homograph detection.
print("\nHomographs Detection:")

# Iterates over each token.
for token in tokens:
    # Retrieves WordNet synsets for the token.
    synsets = wn.synsets(token)
    
    # Initializes a set to collect part-of-speech tags.
    pos_tags = set()
    
    # Adds POS tags for each synset to the set.
    for synset in synsets:
        pos_tags.add(synset.pos())
    
    # Prints if the token has multiple POS tags, indicating it is a homograph.
    if len(pos_tags) > 1:
        print(f"'{token}' is a homograph with POS tags: {', '.join(pos_tags)}")


Homographs Detection:
'lead' is a homograph with POS tags: v, n
'engineer' is a homograph with POS tags: v, n
'in' is a homograph with POS tags: r, s, n
'new' is a homograph with POS tags: s, a, r
'bridge' is a homograph with POS tags: v, n
'project' is a homograph with POS tags: v, n
'has' is a homograph with POS tags: v, n
'made' is a homograph with POS tags: v, s, a
'team' is a homograph with POS tags: v, n
'now' is a homograph with POS tags: r, n
'looking' is a homograph with POS tags: v, s, n
'potential' is a homograph with POS tags: s, a, n
'impacts' is a homograph with POS tags: v, n
'findings' is a homograph with POS tags: v, n
'on' is a homograph with POS tags: a, r
'new' is a homograph with POS tags: s, a, r
'soil' is a homograph with POS tags: v, n
'Meanwhile' is a homograph with POS tags: r, n
'local' is a homograph with POS tags: a, n
'cricket' is a homograph with POS tags: v, n
'hit' is a homograph with POS tags: v, n
'double' is a homograph with POS tags: a, r, n, s, v


#### 5. Polysemy Detection

In [47]:
# Prints the header for polysemy detection.
print("\nPolysemy Detection:")

# Iterates over each token.
for token in tokens:
    # Retrieves WordNet synsets for the token.
    synsets = wn.synsets(token)
    
    # Prints if the token has more than one synset, indicating multiple meanings.
    if len(synsets) > 1:
        print(f"'{token}' has multiple meanings.")


Polysemy Detection:
'lead' has multiple meanings.
'engineer' has multiple meanings.
'in' has multiple meanings.
'new' has multiple meanings.
'bridge' has multiple meanings.
'project' has multiple meanings.
'has' has multiple meanings.
'made' has multiple meanings.
'a' has multiple meanings.
'breakthrough' has multiple meanings.
'team' has multiple meanings.
'is' has multiple meanings.
'now' has multiple meanings.
'looking' has multiple meanings.
'at' has multiple meanings.
'potential' has multiple meanings.
'impacts' has multiple meanings.
'findings' has multiple meanings.
'on' has multiple meanings.
'new' has multiple meanings.
'soil' has multiple meanings.
'Meanwhile' has multiple meanings.
'a' has multiple meanings.
'local' has multiple meanings.
'cricket' has multiple meanings.
'player' has multiple meanings.
'hit' has multiple meanings.
'double' has multiple meanings.
'ton' has multiple meanings.
'in' has multiple meanings.
'sports' has multiple meanings.
'news' has multiple mean

#### 6. Hyponyms of a given word

In [48]:
# Defines a function to find hyponyms for a given word.
def find_hyponyms(word):
    # Initializes a set to collect hyponyms.
    hyponyms = set()
    
    # Retrieves WordNet synsets for the given word.
    synsets = wn.synsets(word)
    
    # Iterates over each synset to find hyponyms.
    for synset in synsets:
        for hyponym in synset.hyponyms():
            for lemma in hyponym.lemmas():
                # Adds hyponyms to the set.
                hyponyms.add(lemma.name())
    
    # Returns the set of hyponyms.
    return hyponyms

#### 7. Categorize the article based on hyponyms of given categories

In [50]:
# Defines a function to categorize terms in the text based on hyponyms of given categories.
def categorize_article(text, category_terms):
    # Tokenizes and lowercases the text.
    tokens = word_tokenize(text.lower())
    
    # Initializes a dictionary to store categorized terms.
    categorized_terms = {term: [] for term in category_terms}
    
    # Iterates over each token.
    for token in tokens:
        # Checks each category term for matching hyponyms.
        for term in category_terms:
            hyponyms = find_hyponyms(term)
            if token in hyponyms:
                # Appends the token to the corresponding category.
                categorized_terms[term].append(token)
    
    # Returns the dictionary of categorized terms.
    return categorized_terms

# Categorizes terms in the article text based on specified general categories.
categories = categorize_article(article_text, ["scientist", "research", "sports", "team", "player"])

# Print out the categorized terms or a message if none are found.
for category, items in categories.items():
    if items:
        print(f"Category '{category}': {', '.join(items)}")
    else:
        print(f"Category '{category}': No matching terms found")

Category 'scientist': No matching terms found
Category 'research': No matching terms found
Category 'sports': No matching terms found
Category 'team': No matching terms found
Category 'player': lead, lead


Code Snippet 6.4