# NLTK Complete Guide - Section 12: WordNet

This notebook covers:
- What is WordNet?
- Synsets (Synonym Sets)
- Semantic Relations
- Word Similarity
- Practical Applications

In [None]:
import nltk

nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

from nltk.corpus import wordnet as wn

## 12.1 What is WordNet?

**WordNet** is a lexical database of English:
- Words grouped into **synsets** (synonym sets)
- Synsets connected by semantic relations
- Covers nouns, verbs, adjectives, adverbs

In [None]:
# Look up a word
synsets = wn.synsets('dog')

print(f"Synsets for 'dog': {len(synsets)}")
print("-" * 50)

for syn in synsets:
    print(f"\n{syn.name()}")
    print(f"  POS: {syn.pos()}")
    print(f"  Definition: {syn.definition()}")
    print(f"  Examples: {syn.examples()}")

## 12.2 Synset Structure

Synset name format: `word.pos.nn`
- **word**: lemma
- **pos**: n (noun), v (verb), a (adj), r (adv)
- **nn**: sense number

In [None]:
# Get specific synset
dog = wn.synset('dog.n.01')

print(f"Synset: {dog}")
print(f"Name: {dog.name()}")
print(f"POS: {dog.pos()}")
print(f"Definition: {dog.definition()}")
print(f"Examples: {dog.examples()}")

In [None]:
# Lemmas in a synset
print(f"Lemmas in {dog.name()}:")
for lemma in dog.lemmas():
    print(f"  {lemma.name()}")

In [None]:
# Filter by POS
word = 'run'

print(f"Synsets for '{word}':")
print("\nNouns:")
for s in wn.synsets(word, pos=wn.NOUN):
    print(f"  {s.name()}: {s.definition()[:50]}...")

print("\nVerbs:")
for s in wn.synsets(word, pos=wn.VERB)[:5]:
    print(f"  {s.name()}: {s.definition()[:50]}...")

## 12.3 Synonyms and Antonyms

In [None]:
def get_synonyms(word):
    """Get all synonyms for a word"""
    synonyms = set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().replace('_', ' '))
    return synonyms

def get_antonyms(word):
    """Get all antonyms for a word"""
    antonyms = set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            for ant in lemma.antonyms():
                antonyms.add(ant.name().replace('_', ' '))
    return antonyms

In [None]:
words = ['happy', 'good', 'fast', 'big']

print("Synonyms and Antonyms")
print("=" * 60)

for word in words:
    syns = get_synonyms(word)
    ants = get_antonyms(word)
    
    print(f"\n{word.upper()}")
    print(f"  Synonyms: {', '.join(list(syns)[:8])}")
    print(f"  Antonyms: {', '.join(list(ants)[:5]) if ants else 'None found'}")

## 12.4 Semantic Relations

In [None]:
# Hypernyms (more general terms)
dog = wn.synset('dog.n.01')

print(f"Hypernyms of {dog.name()} (is-a):")
for hyper in dog.hypernyms():
    print(f"  {hyper.name()}: {hyper.definition()}")

In [None]:
# Hyponyms (more specific terms)
print(f"\nHyponyms of {dog.name()} (types of):")
for hypo in dog.hyponyms()[:10]:
    print(f"  {hypo.name()}: {hypo.definition()[:40]}...")

In [None]:
# Full hypernym path to root
print(f"Hypernym path from {dog.name()} to root:")
print("-" * 50)

for path in dog.hypernym_paths():
    for i, syn in enumerate(path):
        print(f"{'  ' * i}└─ {syn.name()}")
    print()

In [None]:
# Meronyms (part-of relations)
car = wn.synset('car.n.01')

print(f"Parts of {car.name()}:")
print("\nPart meronyms (components):")
for part in car.part_meronyms():
    print(f"  {part.name()}")

print("\nSubstance meronyms (made of):")
for sub in car.substance_meronyms():
    print(f"  {sub.name()}")

In [None]:
# Holonyms (whole-of relations)
wheel = wn.synset('wheel.n.01')

print(f"{wheel.name()} is part of:")
for holo in wheel.part_holonyms():
    print(f"  {holo.name()}")

## 12.5 Word Similarity

In [None]:
# Path similarity (0 to 1, based on shortest path)
dog = wn.synset('dog.n.01')
cat = wn.synset('cat.n.01')
car = wn.synset('car.n.01')
tree = wn.synset('tree.n.01')

print("Path Similarity (0-1):")
print("-" * 40)
print(f"dog - cat: {dog.path_similarity(cat):.3f}")
print(f"dog - car: {dog.path_similarity(car):.3f}")
print(f"dog - tree: {dog.path_similarity(tree):.3f}")
print(f"cat - car: {cat.path_similarity(car):.3f}")

In [None]:
# Wu-Palmer similarity (based on depth in taxonomy)
print("\nWu-Palmer Similarity:")
print("-" * 40)
print(f"dog - cat: {dog.wup_similarity(cat):.3f}")
print(f"dog - car: {dog.wup_similarity(car):.3f}")
print(f"dog - tree: {dog.wup_similarity(tree):.3f}")

In [None]:
# Lowest common hypernym
print("\nLowest Common Hypernyms:")
print("-" * 40)

pairs = [(dog, cat), (dog, car), (cat, tree)]

for s1, s2 in pairs:
    lch = s1.lowest_common_hypernyms(s2)
    print(f"{s1.name()} & {s2.name()}:")
    for h in lch:
        print(f"  → {h.name()}: {h.definition()[:40]}...")

## 12.6 Similarity Matrix

In [None]:
def similarity_matrix(words, pos=wn.NOUN):
    """Create similarity matrix for a list of words"""
    synsets = []
    for word in words:
        syns = wn.synsets(word, pos=pos)
        if syns:
            synsets.append(syns[0])
        else:
            synsets.append(None)
    
    matrix = []
    for s1 in synsets:
        row = []
        for s2 in synsets:
            if s1 and s2:
                sim = s1.wup_similarity(s2)
                row.append(sim if sim else 0)
            else:
                row.append(0)
        matrix.append(row)
    
    return matrix

In [None]:
words = ['dog', 'cat', 'car', 'truck', 'tree', 'flower']
matrix = similarity_matrix(words)

print("Word Similarity Matrix (Wu-Palmer)")
print("=" * 60)
print(f"{'':>10}", end='')
for w in words:
    print(f"{w:>10}", end='')
print()

for i, word in enumerate(words):
    print(f"{word:>10}", end='')
    for j in range(len(words)):
        print(f"{matrix[i][j]:>10.2f}", end='')
    print()

## 12.7 Verb Relations

In [None]:
# Verb entailments
walk = wn.synset('walk.v.01')
eat = wn.synset('eat.v.01')
sleep = wn.synset('sleep.v.01')

print("Verb Entailments (if X then Y):")
print("-" * 40)

verbs = [walk, eat, sleep]
for v in verbs:
    entails = v.entailments()
    if entails:
        print(f"\n{v.name()} entails:")
        for e in entails:
            print(f"  → {e.name()}")

In [None]:
# Verb frames
give = wn.synset('give.v.01')

print(f"Verb frames for {give.name()}:")
for lemma in give.lemmas():
    print(f"\nLemma: {lemma.name()}")
    for frame in lemma.frame_strings():
        print(f"  {frame}")

## 12.8 WordNet Utility Class

In [None]:
class WordNetExplorer:
    """Utility class for WordNet exploration"""
    
    @staticmethod
    def lookup(word, pos=None):
        """Look up all senses of a word"""
        synsets = wn.synsets(word, pos=pos)
        results = []
        for syn in synsets:
            results.append({
                'synset': syn.name(),
                'pos': syn.pos(),
                'definition': syn.definition(),
                'examples': syn.examples(),
                'lemmas': [l.name() for l in syn.lemmas()]
            })
        return results
    
    @staticmethod
    def synonyms(word):
        """Get all synonyms"""
        syns = set()
        for syn in wn.synsets(word):
            for lemma in syn.lemmas():
                syns.add(lemma.name().replace('_', ' '))
        return list(syns)
    
    @staticmethod
    def antonyms(word):
        """Get all antonyms"""
        ants = set()
        for syn in wn.synsets(word):
            for lemma in syn.lemmas():
                for ant in lemma.antonyms():
                    ants.add(ant.name().replace('_', ' '))
        return list(ants)
    
    @staticmethod
    def similarity(word1, word2, measure='wup'):
        """Calculate similarity between two words"""
        syns1 = wn.synsets(word1)
        syns2 = wn.synsets(word2)
        
        if not syns1 or not syns2:
            return None
        
        s1, s2 = syns1[0], syns2[0]
        
        if measure == 'wup':
            return s1.wup_similarity(s2)
        elif measure == 'path':
            return s1.path_similarity(s2)
        elif measure == 'lch':
            return s1.lch_similarity(s2)
    
    @staticmethod
    def hypernym_tree(word, depth=3):
        """Get hypernym tree up to specified depth"""
        syns = wn.synsets(word)
        if not syns:
            return None
        
        def get_tree(syn, d):
            if d == 0:
                return {'name': syn.name(), 'definition': syn.definition()}
            
            hypers = syn.hypernyms()
            return {
                'name': syn.name(),
                'definition': syn.definition(),
                'hypernyms': [get_tree(h, d-1) for h in hypers]
            }
        
        return get_tree(syns[0], depth)

In [None]:
# Use the utility class
explorer = WordNetExplorer()

# Lookup
print("Looking up 'bank':")
for sense in explorer.lookup('bank')[:3]:
    print(f"  {sense['synset']}: {sense['definition'][:50]}...")

# Synonyms/Antonyms
print(f"\nSynonyms of 'happy': {explorer.synonyms('happy')[:5]}")
print(f"Antonyms of 'happy': {explorer.antonyms('happy')}")

# Similarity
print(f"\nSimilarity (dog, cat): {explorer.similarity('dog', 'cat'):.3f}")
print(f"Similarity (dog, car): {explorer.similarity('dog', 'car'):.3f}")

## Summary

| Method | Description |
|--------|-------------|
| `wn.synsets(word)` | Get all synsets for word |
| `wn.synset('dog.n.01')` | Get specific synset |
| `syn.definition()` | Get definition |
| `syn.examples()` | Get usage examples |
| `syn.lemmas()` | Get lemmas (synonyms) |
| `syn.hypernyms()` | More general terms |
| `syn.hyponyms()` | More specific terms |
| `syn.part_meronyms()` | Parts/components |
| `lemma.antonyms()` | Antonyms |
| `syn.wup_similarity(syn2)` | Wu-Palmer similarity |
| `syn.path_similarity(syn2)` | Path-based similarity |