In [1]:
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize, pos_tag

In [90]:
def least_similar(synsets):
    # returns entries with highest wup similarity score
    the_max = 0
    for i in range(len(synsets)):
        for j in range(i + 1, len(synsets)):
            this_similarity = synsets[i].wup_similarity(synsets[j])
            if this_similarity is not None and this_similarity > the_max:
                the_max = this_similarity
                s1 = synsets[i]
                s2 = synsets[j]
    return (s1, s2)

def best_corresponding_pos(synset):
    # inspects definition and returns most similar word with
    # same pos as input word excluding the input word
    # right now only works for nouns, verbs, adjectives
    # returns a tuple with a boolean describing whether
    # the same pos was found, similarity, and word
    this_pos = synset.pos()
    def_token = word_tokenize(synset.definition())
    
    if  this_pos == wn.NOUN:
        pos_token = 'NN'
    elif this_pos == wn.VERB:
        pos_token = 'V'
    elif this_pos == wn.ADJ:
        pos_token = 'JJ'
    else:
        # don't care about pos
        pos_token = 'z'
#     else:
#         raise ValueError('Input synset must be a Noun, Verb, or Adjective')
        
    lm = WordNetLemmatizer()
    min_similarity = 2
    min_similarity_other_pos = 2
    best_corr_pos = None
    best_corr_other_pos = None
    
    for tagged_word in pos_tag(def_token):
        if pos_token in tagged_word[1]:
            lemma = lm.lemmatize(tagged_word[0], pos=synset.pos())
            other_synsets = wn.synsets(lemma, pos=synset.pos())
            if len(other_synsets) > 0:
                # just pick the first synset
                this_similarity = synset.wup_similarity(other_synsets[0])
                if this_similarity is not None and this_similarity < min_similarity:
                    min_similarity = this_similarity
                    best_corr_pos = tagged_word[0]
        elif best_corr_pos is None:
            lemma = lm.lemmatize(tagged_word[0])
            other_synsets = wn.synsets(lemma)
            if len(other_synsets) > 0:
                # just pick the first synset
                this_similarity = synset.wup_similarity(other_synsets[0])
                if this_similarity is not None and this_similarity < min_similarity_other_pos:
                    min_similarity_other_pos = this_similarity
                    best_corr_other_pos = tagged_word[0]
    # if no word with same pos found
    if best_corr_pos is None:
        return False, min_similarity_other_pos, best_corr_other_pos
    return True, min_similarity, best_corr_pos

def first_corresponding_pos(synset):
    # inspects definition and returns first word with same pos
    # as input word
    # right now only works for nouns, verbs, adjectives
    this_pos = synset.pos()
    def_token = word_tokenize(synset.definition())
    
    if this_pos == 'n':
        pos_token = 'NN'
    elif this_pos == 'v':
        pos_token = 'V'
    elif this_pos == 'a':
        pos_token = 'JJ'
    else:
        raise ValueError('Input synset must be a Noun, Verb, or Adjective')
    
    # in case same pos does not exist
    if this_pos == 'n':
        alt_pos = 'JJ'
    else:
        alt_pos = 'NN'
        
    first_alt = None
    for tagged_word in pos_tag(def_token):
        if pos_token in tagged_word[1]:
            return tagged_word[0], synset.definition()
        elif first_alt is None and alt_pos in tagged_word:
            first_alt = tagged_word[0]
    return first_alt, synset.definition()
        
def get_two_senses(seed_word):
    synsets = wn.synsets(seed_word)
    pair = least_similar(synsets)
#     return best_corresponding_pos(pair[0])[2], best_corresponding_pos(pair[1])[2]
    return first_corresponding_pos(pair[0]), first_corresponding_pos(pair[1])

def traverse_wn(word):
    # traverses wn synsets for word and returns best
    # word in definition of synsets with same pos
    
    # best word
#     min_similarity_other_pos = 2
#     min_similarity = 2
#     best_other_pos = None
#     best_pos = None
#     for synset in wn.synsets(word):
#         is_same_pos, this_similarity, this_pos = best_corresponding_pos(synset)
#         if is_same_pos and this_similarity < min_similarity:
#             min_similarity = this_similarity
#             best_pos = this_pos
#         elif not is_same_pos and this_similarity < min_similarity_other_pos:
#             min_similarity_other_pos = this_similarity
#             best_other_pos = this_pos
#     if best_pos is None:
#         return best_other_pos
#     return best_pos

    # first word
    for synset in wn.synsets(word):
        first_pos = first_corresponding_pos(synset)
        if first_pos is not None and first_pos != word:
            return first_pos
        
def five_word_algorithm(seed_word):
    word_c, word_d = get_two_senses(seed_word)
    word_b = traverse_wn(word_c[0])
    word_a = traverse_wn(word_b[0])
    word_e = traverse_wn(word_d[0])
    return word_a, word_b, word_c, word_d, word_e

def print_five_words(seed_word):
    words = five_word_algorithm(seed_word)
    print(words[0][0] + '->' + words[1][0] + '->\033[4m' + words[2][0] + 
          '\033[0m\033[1m~~>\033[0m\033[4m' + words[3][0] + '\033[0m->' + words[4][0])

In [91]:
five_word_algorithm('chicken')

(('vertebrates',
  'warm-blooded egg-laying vertebrates characterized by feathers and forelimbs modified as wings'),
 ('bird',
  'a domesticated gallinaceous bird thought to be descended from the red jungle fowl'),
 ('fowl',
  'a domestic fowl bred for flesh or eggs; believed to have been developed from the red jungle fowl'),
 ('person', 'a person who lacks confidence, is irresolute and wishy-washy'),
 ('human', 'a human being'))

In [76]:
# first word
print_five_words('chicken')
print_five_words('fan')
print_five_words('Bat')
print_five_words('Battery')
print_five_words('Trip')
print_five_words('Cook')
print_five_words('Straw')
print_five_words('Fiddle')

vertebrates->bird->[4mfowl[0m[1m~~>[0m[4mperson[0m->human
person->follower->[4mdevotee[0m[1m~~>[0m[4mfollower[0m->person
sound->noise->[4mracket[0m[1m~~>[0m[4mclub[0m->team
property->number->[4mgroup[0m[1m~~>[0m[4munit[0m->division
document->act->[4mjourney[0m[1m~~>[0m[4mlight[0m->physics
kind->make->[4mprepare[0m[1m~~>[0m[4mapplying[0m->put
structure->buildings->[4mplant[0m[1m~~>[0m[4mconsisting[0m->originate
power->influence->[4mmanipulate[0m[1m~~>[0m[4mfalsify[0m->make


In [56]:
# best word
print_five_words('chicken')
print_five_words('fan')
print_five_words('Bat')
print_five_words('Battery')
print_five_words('Trip')
print_five_words('Cook')
print_five_words('Straw')
print_five_words('Fiddle')

person->names->[4mflesh[0m[1m~~>[0m[4mconfidence[0m->state
organisms->biology->[4msports[0m[1m~~>[0m[4madmirer[0m->team
biology->sports->[4mracket[0m[1m~~>[0m[4mclub[0m->golf
operate->pedal->[4mguns[0m[1m~~>[0m[4mcatcher[0m->baseball
dance->ball->[4mreturn[0m[1m~~>[0m[4mlight[0m->sensation
make->spoken->[4mprepare[0m[1m~~>[0m[4mapplying[0m->convey
plants->crop->[4mfodder[0m[1m~~>[0m[4mseed[0m->tournament
piece->performance->[4mor[0m[1m~~>[0m[4mfalsify[0m->falsifying


In [92]:
wn.synsets('chicken')

[Synset('chicken.n.01'),
 Synset('chicken.n.02'),
 Synset('wimp.n.01'),
 Synset('chicken.n.04'),
 Synset('chicken.s.01')]

In [93]:
wn.synset('wimp.n.01').definition()

'a person who lacks confidence, is irresolute and wishy-washy'

In [79]:
for synset in wn.synsets('Fiddle'):
    print(synset)
    print(synset.definition())
    print('')

Synset('violin.n.01')
bowed stringed instrument that is the highest member of the violin family; this instrument has four strings and a hollow body and an unfretted fingerboard and is played with a bow

Synset('fiddle.v.01')
avoid (one's assigned duties)

Synset('fiddle.v.02')
commit fraud and steal from one's employer

Synset('fiddle.v.03')
play the violin or fiddle

Synset('fiddle.v.04')
play on a violin

Synset('toy.v.02')
manipulate manually or in one's mind or imagination

Synset('tamper.v.01')
play around with or alter or falsify, usually secretively or dishonestly

Synset('tinker.v.03')
try to fix or mend



In [84]:
print(wn.synset('toy.v.02').wup_similarity(wn.synset('tamper.v.01')))
print(wn.synset('toy.v.02').wup_similarity(wn.synset('violin.n.01')))

0.75
0.11764705882352941
