In [18]:
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize, pos_tag

In [67]:
def least_similar(synsets):
    # returns entries with highest wup similarity score
    the_max = 0
    for i in range(len(synsets)):
        for j in range(i + 1, len(synsets)):
            this_similarity = synsets[i].wup_similarity(synsets[j])
            if this_similarity is not None and this_similarity > the_max:
                the_max = this_similarity
                s1 = synsets[i]
                s2 = synsets[j]
    return (s1, s2)

def best_corresponding_pos(synset):
    # inspects definition and returns most similar word with
    # same pos as input word excluding the input word
    # right now only works for nouns, verbs, adjectives
    this_pos = synset.pos()
    def_token = word_tokenize(synset.definition())
    
    if  this_pos == wn.NOUN:
        pos_token = 'NN'
    elif this_pos == wn.VERB:
        pos_token = 'V'
    elif this_pos == wn.ADJ:
        pos_token = 'JJ'
    else:
        raise ValueError('Input synset must be a Noun, Verb, or Adjective')
        
    lm = WordNetLemmatizer()
    max_similarity = -1
    best_corr_pos = None
    
    for tagged_word in pos_tag(def_token):
        if pos_token in tagged_word[1]:
            lemma = lm.lemmatize(tagged_word[0], pos=synset.pos())
            other_synsets = wn.synsets(lemma, pos=synset.pos())
            if len(other_synsets) > 0:
                # just pick the first synset
                this_similarity = synset.wup_similarity(other_synsets[0])
                if this_similarity > max_similarity:
                    max_similarity = this_similarity
                    best_corr_pos = tagged_word[0]
    return best_corr_pos

def first_corresponding_pos(synset):
    # inspects definition and returns first word with same pos
    # as input word
    # right now only works for nouns, verbs, adjectives
    this_pos = synset.pos()
    def_token = word_tokenize(synset.definition())
    
    if this_pos == 'n':
        pos_token = 'NN'
    elif this_pos == 'v':
        pos_token = 'V'
    elif this_pos == 'a':
        pos_token = 'JJ'
    else:
        raise ValueError('Input synset must be a Noun, Verb, or Adjective')
    
    for tagged_word in pos_tag(def_token):
        if pos_token in tagged_word[1]:
            return tagged_word[0]
        
def get_two_senses(seed_word):
    synsets = wn.synsets(seed_word)
    pair = least_similar(synsets)
    return best_corresponding_pos(pair[0]), best_corresponding_pos(pair[1])
#     return first_corresponding_pos(pair[0]), first_corresponding_pos(pair[1])

def traverse_wn(word):
    # traverses wn synsets for word and returns first
    # word in definition of synsets with same pos
    for synset in wn.synsets(word):
        first_pos = best_corresponding_pos(synset)
#         first_pos = first_corresponding_pos(synset)
        if first_pos is not None and first_pos != word:
            return first_pos
        
def five_word_algorithm(seed_word):
    word_c, word_d = get_two_senses(seed_word)
    word_b = traverse_wn(word_c)
    word_a = traverse_wn(word_b)
    word_e = traverse_wn(word_d)
    return word_a, word_b, word_c, word_d, word_e

def print_five_words(seed_word):
    words = five_word_algorithm(seed_word)
    print(words[0] + '->' + words[1] + '->\033[4m' + words[2] + 
          '\033[0m\033[1m~~>\033[0m\033[4m' + words[3] + '\033[0m->' + words[4])

In [3]:
# first word
print_five_words('chicken')
print_five_words('fan')
print_five_words('Bat')
print_five_words('Battery')
print_five_words('Trip')

flesh->bird->[4mfowl[0m[1m~~>[0m[4mperson[0m->body
person->follower->[4mdevotee[0m[1m~~>[0m[4mfollower[0m->person
sound->noise->[4mracket[0m[1m~~>[0m[4mclub[0m->team
property->number->[4mgroup[0m[1m~~>[0m[4munit[0m->division
document->act->[4mjourney[0m[1m~~>[0m[4mlight[0m->radiation


In [68]:
# best word
print_five_words('chicken')
print_five_words('fan')
print_five_words('Bat')
print_five_words('Battery')
print_five_words('Trip')

substance->food->[4mfowl[0m[1m~~>[0m[4mperson[0m->body
person->follower->[4mdevotee[0m[1m~~>[0m[4mfollower[0m->person
buildings->plants->[4msquash[0m[1m~~>[0m[4mball[0m->round
existence->entities->[4mgroup[0m[1m~~>[0m[4munit[0m->quantity
document->act->[4mjourney[0m[1m~~>[0m[4mtread[0m->step


In [69]:
print_five_words('Cook')
print_five_words('Straw')

kind->make->[4mprepare[0m[1m~~>[0m[4mapplying[0m->make
person->substance->[4mfiber[0m[1m~~>[0m[4mstem[0m->form


In [70]:
print_five_words('Fiddle')

AttributeError: 'NoneType' object has no attribute 'lower'

In [71]:
get_two_senses('Fiddle')

(None, 'falsify')

In [74]:
least_similar(wn.synsets('Fiddle'))

(Synset('toy.v.02'), Synset('tamper.v.01'))

In [76]:
wn.synset('toy.v.02').definition()

"manipulate manually or in one's mind or imagination"

In [77]:
pos_tag(word_tokenize(wn.synset('toy.v.02').definition()))

[('manipulate', 'NN'),
 ('manually', 'RB'),
 ('or', 'CC'),
 ('in', 'IN'),
 ('one', 'CD'),
 ("'s", 'POS'),
 ('mind', 'NN'),
 ('or', 'CC'),
 ('imagination', 'NN')]