In [12]:
#!/usr/bin/env python

from gensim.models import Word2Vec
from gensim.models.keyedvectors import KeyedVectors

# Load Google's pre-trained Word2Vec model
model = KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True) 
# has plural and upper/lower case, and even bigrams (e.g., taxpayer_dollars; vast_sums)

0.43215370794910785

In [94]:
from nltk.stem import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
wnl = WordNetLemmatizer()
sstm = SnowballStemmer("english")

In [20]:

# flex word2vec's muscles
model.doesnt_match("man woman child kitchen".split())
model.doesnt_match("france england germany berlin".split())
model.doesnt_match("paris berlin london austria".split())
model.most_similar("amsterdam")

# Consider a two-person task with a signaler and a receiver (similar to the TV gameshow 'Password'):
# The signalers were told that they would be playing a word-guessing game in which 
# they would have to think of one-word signals that would help someone guess their items. 
# They were talked through an example: if the item was 'dog', then a good signal would be 
# 'puppy' since most people given 'puppy' would probably guess 'dog'.

# sender thinks bank, says money
# receiver think cash
model.most_similar("bank") # .69 robber, .67 robbery, robbers, security, agency ..
model.most_similar("money") # .55 dollars, .55 profit, .54 cash
print model.most_similar("cash") # .69 capitalize, .54 money, sell, debt, tax


model['money']

print model.similarity("hot","cold") # .20
print model.similarity("hot","warm") # .14



[(u'money', 0.6151220798492432), (u'Cash', 0.5354235172271729), (u'cows_BCG_matrix', 0.5002977848052979), (u'funds', 0.48987293243408203), (u'marketable_securities', 0.473971962928772), (u'US1bn', 0.46651774644851685), (u'cash_flow', 0.4662739634513855), (u'sweepstakes_winnings', 0.46608874201774597), (u'shortstop_Pedro_Ciriaco', 0.46578019857406616), (u'$', 0.4635189175605774)]
0.46021386896123756
0.43215370794910785


In [147]:
def is_valid(word, password):
    """ check whether the hint is a valid hint
    
    word must not be part of another word, or contain hyphens/be more than two words"""
    lowerword = word.encode('ascii', errors='ignore').lower()
    lowerpass = password.encode('ascii', errors='ignore').lower()
    if lowerword == lowerpass:
        return False
    for char in '-_ ':
        if char in lowerword:
            return False
    if sstm.stem(lowerword)==sstm.stem(lowerpass):
        return False
    if wnl.lemmatize(lowerword) == wnl.lemmatize(lowerpass):
        return False
    return True

In [138]:
def send_word(secret, already_sent):
    """ Simulate a player trying to give hints.
    
    player tries words most similar to the password, if that fails, 
    words similar to the words similar to the password
    
    secret: the password to be guessed by the other player
    already_sent: list of words that have already been sent
    
    returns: a hint to the password, or None if it tried all words."""
    
    similars = model.most_similar(secret)  #find similar words
    for i in similars:
        if is_valid(i[0], secret):  # check if hint can be given
            if i[0] not in already_sent:
                return i[0]
            
    # words most similar to the password have failed
    sim_to_similars = model.most_similar(similars[0][0])  # find words similar
    for i in similars[1:]:                                # to words similar
        sim_to_similars += model.most_similar(i[0])       # to the password
    for i in sim_to_similars:  # send the words if they are valid
        if is_valid(i[0], secret):
            if i[0] not in already_sent:
                return i[0]
    return None  # no words were correct hints, system gives up

In [139]:
def receive_word(hint):
    """ simulate the player that has to guess the password.
    
    try to find the word most similar to the hint. check if that hint
    is valid, otherwise try a different word.
    
    TODO: improve guesses based on previous words."""
    similars = model.most_similar(hint)  # get most similar words to the hint
    for sim in similars:
        if is_valid(sim[0], hint):  # check whether word is valid
            return sim[0]
    return similars[0][0]  # if none of the words are valid, return the first guess

In [None]:
passwords = ['cut', 'ice', 'stamp', 'self', 'snail', 'now',
'bed', 'night', 'needle', 'scratch', 'bank', 'joke', 'king', 'salt', 'good', 'washer', 'east', 'nail', 'bulb', 'lost']

for pw in passwords:
    print "The password is {}".format(pw)
    play_password(pw)
    
    
    

The password is cut
slash cut
Found in 1 guesses
The password is ice
icy snowy
snow snowfall
icesheets icesheet
Vatnajökull Vatnajokull
megacryometeor midlatitudes
snowy wintry
frigid chilly
wintry wintery
icey icy
slushy icy
frosty chilly
chilly frigid
Icy Slushy
slippery slippy
thaws melts
refreezes recrystalizes
melts evaporates
snowfall snow
snows snowfall
snowstorm blizzard
snowfalls snow
sleet snow
the method failed
The password is stamp
semipostal stamps
commemoratives stamps
cachets stamps
sheetlet stamps
imprinted etched
affixed attached
embossed engraved
emblazoned stenciled
stencilled emblazoned
inscribed engraved
stenciled emblazoned
printed typeset
engraved inscribed
preaddressed prepaid_postage
Coin Numismatic
Philatelic Philately
Banknotes Coinage
Philately Philatelic
the method failed
The password is self
narcissistic egotistical
selfconfidence humilty
narcissism egotism
Henrickson Budke
Gillispie Calipari
Turgeon Gadowsky
egotistical egocentric
egocentric egotistical
n

In [117]:
def play_password(password):
    """ method simulating a game of password
    
    for a given password, one player, that knows the password
    gives hints, the other player has to guess the password based
    on that word. After 100 tries it stops."""
    send_words = []
    for guessnr in range(100):
        send = send_word(password, send_words)
        send_words.append(send)
        if send is None:
            print "the method failed"
            break
        print send, receive_word(send)
        if password == receive_word(send):
            print "Found in {} guesses".format(guessnr+1)
            break
    print "the method failed"