In [214]:
import gensim
from nltk.corpus import wordnet as wn
from nltk.metrics.distance import edit_distance

model = gensim.models.KeyedVectors.load_word2vec_format('gigaword-nocase-26.bin', binary=True)

In [239]:
class NishyBot2:
    TOP_N = 2000
    SAMENESS_THRESHOLD = 0

    def find_similars(words):
        similars = []
        for word in words:
            try:
                similars.append([x[0] for x in model.most_similar(positive=[word], topn=NishyBot2.TOP_N)])
            except KeyError:
                similars.append([])
                print('No similar words found for', word)
        return similars

    def count_matches(wordlists, test):
        count = 0
        for wordlist in wordlists:
            if test in wordlist:
                count += len(wordlist) - wordlist.index(test)
        return count

    def __init__(self, good, bad, okay, assassin):
        self.good = good
        self.bad = bad
        self.okay = okay
        self.assassin = assassin

        self.good_similars = NishyBot2.find_similars(good)
        self.bad_similars = NishyBot2.find_similars(bad)
        self.okay_similars = NishyBot2.find_similars(okay)
        self.assassin_similars = NishyBot2.find_similars(assassin)

    def good_matches(self, test):
        result = []
        for i, similar in enumerate(self.good_similars):
            if test in similar:
                result.append(self.good[i])
        return result

    def score(self, test):
        good_count = NishyBot2.count_matches(self.good_similars, test)
        bad_count = NishyBot2.count_matches(self.bad_similars, test)
        okay_count = NishyBot2.count_matches(self.okay_similars, test)
        assassin_count = NishyBot2.count_matches(self.assassin_similars, test)

        return good_count - bad_count - 0.5 * okay_count - 3 * assassin_count

    def score_all(self, wordset):
        scores = [(x, self.score(x)) for x in wordset]
        scores.sort(key=lambda x: x[1], reverse=True)
        scores = dict(scores)
        return scores

    def score_all_pruned(self, wordset):
        scores = self.score_all(wordset)
        hints = list(scores.keys())

        for i, hint in enumerate(hints):
            if len(wn.synsets(hint)) == 0:  # if it's not a real word
                if hint in scores:
                    scores.pop(hint)
            else:  # if it's too similar to an existing word
                for word in self.good:
                    if word in hint or hint in word:
                        if hint in scores:
                            scores.pop(hint)
                            break

        return scores


In [240]:
def pregame(good, bad, okay, assassin):
    good2 = good.copy()
    while len(good2) != 0:
        n = NishyBot2(good2, bad, okay, assassin)

        s = set()
        for similar in n.good_similars:
            for word in similar:
                s.add(word)

        sc = n.score_all_pruned(s)

        hint = list(sc.keys())[0]
        matches = n.good_matches(hint)
        print(hint, matches)
        good2 = list(filter(lambda x: x not in matches, good2))


In [241]:
good = 'sack makeup bottle cuckoo cast cone jockey America'.lower().split(' ')
bad = 'nut Russia fog break spider bear rip tube plane'.lower().split(' ')
okay = 'Christmas pool Beijing trip nyc fever peanut'.lower().split(' ')
assassin = 'link'.lower().split(' ')

# good = 'spot,blade,chain,record,magician,jeweler,fiddle,apple'.lower().split(',')
# bad = 'wonderland,Newton,glacier,pig,spy,lead,mess,duck,stable'.lower().split(',')
# okay = 'India,millionaire,rainbow,razor,bridge,polo,Notre,Dame'.lower().split(',')
# assassin = 'ice,cream'.lower().split(',')

# good = ['giant', 'thumb', 'nail', 'lock','plane', 'ship','cell','state', 'capital']
# bad = ['Aztec', 'court','chocolate','space','snow']
# okay = ['shop','genius','ambulance','button','heart','pupil','vet']
# assassin = ['microscope']

In [242]:
pregame(good, bad, okay, assassin)

ensemble ['makeup', 'cuckoo', 'cast']
burlap ['sack', 'bottle', 'cone']
helmsman ['jockey', 'america']


In [232]:
n = NishyBot2(good, bad, okay, assassin)

s = set()
for similar in n.good_similars:
    for word in similar:
        s.add(word)
print(len(s))

sc = n.score_all_pruned(s)
[(x, n.good_matches(x), sc[x]) for x in list(sc.keys())][:20]

14038


[('musician', ['magician', 'jeweler', 'fiddle'], 5049.0),
 ('store', ['chain', 'jeweler', 'apple'], 4851.0),
 ('retailer', ['chain', 'jeweler', 'apple'], 4822.0),
 ('violinist', ['magician', 'jeweler', 'fiddle'], 4708.0),
 ('pianist', ['magician', 'jeweler', 'fiddle'], 4583.0),
 ('expertly', ['blade', 'magician', 'fiddle'], 4370.0),
 ('spinning', ['blade', 'magician', 'fiddle'], 4211.0),
 ('talbots', ['chain', 'jeweler', 'apple'], 4081.0),
 ('performer', ['spot', 'record', 'magician'], 3984.0),
 ('virtuoso', ['magician', 'fiddle'], 3960.0),
 ('processor', ['blade', 'chain', 'apple'], 3930.0),
 ('hotel', ['spot', 'chain', 'jeweler'], 3917.0),
 ('swivel', ['spot', 'blade', 'fiddle'], 3826.0),
 ('painter', ['magician', 'jeweler'], 3799.0),
 ('percussionist', ['magician', 'fiddle'], 3747.0),
 ('grocer', ['chain', 'jeweler'], 3741.0),
 ('harpist', ['magician', 'jeweler', 'fiddle'], 3741.0),
 ('boutique', ['chain', 'jeweler'], 3737.0),
 ('vendor', ['chain', 'jeweler', 'apple'], 3709.0),
 ('l

In [224]:
NishyBot2.count_matches(n.bad_similars, 'spoiled')

0

In [227]:
n.good_similars[good.index('cone')].index('encase')

1