In [53]:
import gensim
from nltk.corpus import wordnet as wn
from nltk.metrics.distance import edit_distance

# word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))
model = gensim.models.KeyedVectors.load_word2vec_format('gigaword.bin', binary=True)

In [54]:
class NishyBot2:
    TOP_N = 3000
    SAMENESS_THRESHOLD = 0.5

    def find_similars(words):
        similars = []
        for word in words:
            try:
                similars.append([x[0] for x in model.most_similar(positive=[word], topn=NishyBot2.TOP_N)])
            except KeyError:
                similars.append([])
                print('No similar words found for', word)
        return similars

    def count_matches(wordlists, test):
        count = 0
        for wordlist in wordlists:
            if test in wordlist:
                count += len(wordlist) - wordlist.index(test)
        return count

    def __init__(self, good, bad, okay, assassin):
        self.good = good
        self.bad = bad
        self.okay = okay
        self.assassin = assassin

        self.good_similars = NishyBot2.find_similars(good)
        self.bad_similars = NishyBot2.find_similars(bad)
        self.okay_similars = NishyBot2.find_similars(okay)
        self.assassin_similars = NishyBot2.find_similars(assassin)

    def good_matches(self, test):
        result = []
        for i, similar in enumerate(self.good_similars):
            if test in similar:
                result.append(self.good[i])
        return result

    def score(self, test):
        good_count = NishyBot2.count_matches(self.good_similars, test)
        bad_count = NishyBot2.count_matches(self.bad_similars, test)
        okay_count = NishyBot2.count_matches(self.okay_similars, test)
        assassin_count = NishyBot2.count_matches(self.assassin_similars, test)

        return good_count - bad_count - 0.5 * okay_count - 5 * assassin_count

    def score_all(self, wordset):
        scores = [(x, self.score(x)) for x in wordset]
        scores.sort(key=lambda x: x[1], reverse=True)
        scores = dict(scores)
        return scores

    def score_all_pruned(self, wordset):
        scores = self.score_all(wordset)
        hints = list(scores.keys())

        for i, hint in enumerate(hints):
            if len(wn.synsets(hint)) == 0:
                if hint in scores:
                    scores.pop(hint)
            else:
                for word in self.good:
                    leven = edit_distance(hint, word)
                    if leven / len(word) < NishyBot2.SAMENESS_THRESHOLD:
                        if hint in scores:
                            scores.pop(hint)

        return scores


In [55]:
def pregame(good, bad, okay, assassin):
    good2 = good.copy()
    while len(good2) != 0:
        n = NishyBot2(good2, bad, okay, assassin)

        s = set()
        for similar in n.good_similars:
            for word in similar:
                s.add(word)

        sc = n.score_all_pruned(s)

        hint = list(sc.keys())[0]
        matches = n.good_matches(hint)
        print(hint, matches)
        good2 = list(filter(lambda x: x not in matches, good2))


In [57]:
good = 'sack makeup bottle cuckoo cast cone jockey America'.split(' ')
bad = 'nut Russia fog break spider bear rip tube plane'.split(' ')
okay = 'Christmas pool Beijing trip New York fever peanut'.split(' ')
assassin = 'link'.split(' ')

good = ['giant', 'thumb', 'nail', 'lock','plane', 'ship','cell','state', 'capital']
bad = ['Aztec', 'court','chocolate','space','snow']
okay = ['shop','genius','ambulance','button','heart','pupil','vet']
assassin = ['microscope']

In [58]:
pregame(good, bad, okay, assassin)

wrench ['thumb', 'nail', 'lock']
city ['state', 'capital']
freighter ['plane', 'ship']
microchip ['giant', 'cell']


In [26]:
n = NishyBot2(good, bad, okay, assassin)

s = set()
for similar in n.good_similars:
    for word in similar:
        s.add(word)
print(len(s))

sc = n.score_all_pruned(s)
[(x, n.good_matches(x), sc[x]) for x in list(sc.keys())][:20]

22261


[('spoiled', ['sack', 'makeup', 'cast'], 5746.0),
 ('actor', ['cast', 'jockey'], 5723.0),
 ('satchel', ['sack', 'bottle'], 5358.0),
 ('bag', ['sack', 'bottle'], 5296.0),
 ('turnout', ['makeup', 'cast'], 5132.0),
 ('bathrobe', ['sack', 'makeup', 'bottle'], 5041.0),
 ('actress', ['cast', 'jockey'], 4993.0),
 ('bags', ['sack', 'bottle'], 4979.5),
 ('lipstick', ['makeup', 'bottle'], 4974.0),
 ('bustier', ['makeup', 'bottle'], 4857.0),
 ('lipsticks', ['makeup', 'bottle'], 4838.5),
 ('Spoilt', ['makeup', 'cast'], 4780.0),
 ('suntan', ['makeup', 'bottle'], 4733.5),
 ('sequin', ['makeup', 'bottle'], 4733.0),
 ('wig', ['makeup', 'bottle'], 4726.0),
 ('duffel', ['sack', 'bottle'], 4718.0),
 ('strongbox', ['sack', 'bottle'], 4697.0),
 ('crate', ['sack', 'bottle'], 4657.0),
 ('typecasting', ['makeup', 'cast'], 4633.0),
 ('impersonator', ['makeup', 'jockey'], 4595.0)]

In [27]:
NishyBot2.count_matches(n.bad_similars, 'spoiled')

0

In [36]:
n.good_similars[good.index('cast')].index('spoiled')

78

In [ ]:
"""
tour: ['concert', 'play', 'trip']
wrestle: ['play', 'tag']
wrestle: ['play', 'tag']
robe: ['dress', 'chair']

'claw', ['giant', 'thumb', 'nail', 'lock'])
('freighter', ['plane', 'ship'])
('republic', ['state', 'capital'])
('apoptosis', ['cell'])
('protruding', ['giant', 'thumb', 'lock'])

[('claw', ['giant', 'thumb', 'nail', 'lock']),
 ('gunwale', ['thumb', 'nail', 'lock', 'plane', 'ship']),
 ('protruding', ['giant', 'thumb', 'nail', 'lock']),
 ('shoelace', ['thumb', 'nail', 'lock']),
 ('mortise', ['thumb', 'nail', 'lock']),
 ('bandsaw', ['thumb', 'nail', 'lock']),
 ('long-handled', ['thumb', 'nail', 'lock']),
 ('vise', ['thumb', 'nail', 'lock']),
 ('shackle', ['thumb', 'nail', 'lock']),
 ('tenon', ['thumb', 'nail', 'lock']),
 ('ferrule', ['thumb', 'nail', 'lock']),
 ('pliers', ['thumb', 'nail', 'lock']),
 ('screws', ['thumb', 'nail', 'lock']),
 ('wrists', ['thumb', 'nail', 'lock']),
 ('claws', ['giant', 'thumb', 'nail']),
 ('halyard', ['thumb', 'nail', 'lock', 'ship']),
 ('scissors', ['thumb', 'nail', 'lock']),
 ('dowel', ['thumb', 'nail', 'lock']),
 ('forepaws', ['giant', 'thumb', 'nail']),
 ('dowels', ['thumb', 'nail', 'lock'])]
"""

