In [7]:
import gensim
from nltk.corpus import wordnet as wn

model = gensim.models.KeyedVectors.load_word2vec_format('gigaword-nocase-26.bin', binary=True)

In [8]:
class NishyBot2:
    TOP_N = 2000
    SAMENESS_THRESHOLD = 0

    def find_similars(words):
        similars = []
        for word in words:
            try:
                similars.append({x[0]:i for i, x in enumerate(model.most_similar(positive=[word], topn=NishyBot2.TOP_N))})
            except KeyError:
                similars.append([])
                print('No similar words found for', word)
        return similars

    def count_matches(wordlists, test):
        count = 0
        for wordlist in wordlists:
            if test in wordlist:
                count += len(wordlist) - wordlist[test]
        return count

    def __init__(self, good, bad, okay, assassin):
        self.good = good
        self.bad = bad
        self.okay = okay
        self.assassin = assassin

        self.good_similars = NishyBot2.find_similars(good)
        self.bad_similars = NishyBot2.find_similars(bad)
        self.okay_similars = NishyBot2.find_similars(okay)
        self.assassin_similars = NishyBot2.find_similars(assassin)

    def good_matches(self, test):
        result = []
        for i, similar in enumerate(self.good_similars):
            if test in similar:
                result.append(self.good[i])
        return result

    def score(self, test):
        good_count = NishyBot2.count_matches(self.good_similars, test)
        bad_count = NishyBot2.count_matches(self.bad_similars, test)
        okay_count = NishyBot2.count_matches(self.okay_similars, test)
        assassin_count = NishyBot2.count_matches(self.assassin_similars, test)

        return good_count - bad_count - 0.5 * okay_count - 3 * assassin_count # maybe use a gan to optimize these parameters + TOP_N? not many things to optimize...

    def score_all(self, wordset):
        scores = [(x, self.score(x)) for x in wordset]
        scores.sort(key=lambda x: x[1], reverse=True)
        scores = dict(scores)
        return scores

    def score_all_pruned(self, wordset):
        scores = self.score_all(wordset)
        hints = list(scores.keys())

        for i, hint in enumerate(hints):
            if len(wn.synsets(hint)) == 0:  # if it's not a real word
                if hint in scores:
                    scores.pop(hint)
            else:  # if it's too similar to an existing word
                for word in self.good:
                    if word in hint or hint in word:
                        if hint in scores:
                            scores.pop(hint)
                            break

        return scores


In [9]:
def pregame(good, bad, okay, assassin):
    good2 = good.copy()
    while len(good2) != 0:
        n = NishyBot2(good2, bad, okay, assassin)

        s = set()
        for similar in n.good_similars:
            for word in similar:
                s.add(word)

        sc = n.score_all_pruned(s)

        hint = list(sc.keys())[0]
        matches = n.good_matches(hint)
        print(hint, matches)
        good2 = list(filter(lambda x: x not in matches, good2))


In [10]:
# good = 'sack makeup bottle cuckoo cast cone jockey America'.lower().split(' ')
# bad = 'nut Russia fog break spider bear rip tube plane'.lower().split(' ')
# okay = 'Christmas pool Beijing trip nyc fever peanut'.lower().split(' ')
# assassin = 'link'.lower().split(' ')

# good = 'spot,blade,chain,record,magician,jeweler,fiddle,apple'.lower().split(',')
# bad = 'wonderland,Newton,glacier,pig,spy,lead,mess,duck,stable'.lower().split(',')
# okay = 'India,millionaire,rainbow,razor,bridge,polo,Notre,Dame'.lower().split(',')
# assassin = 'ice,cream'.lower().split(',')

good = ['giant', 'thumb', 'nail', 'lock','plane', 'ship','cell','state', 'capital']
bad = ['aztec', 'court','chocolate','space','snow']
okay = ['shop','genius','ambulance','button','heart','pupil','vet']
assassin = ['microscope']

In [11]:
pregame(good, bad, okay, assassin)


stick ['thumb', 'nail', 'lock']
nation ['giant', 'state', 'capital']
freighter ['plane', 'ship']
phone ['cell']


In [12]:
n = NishyBot2(good, bad, okay, assassin)

s = set()
for similar in n.good_similars:
    for word in similar:
        s.add(word)
print(len(s))

sc = n.score_all_pruned(s)
[(x, n.good_matches(x), sc[x]) for x in list(sc.keys())][:20]

15732


[('stick', ['thumb', 'nail', 'lock'], 5482.5),
 ('meanwhile', ['giant', 'lock', 'state', 'capital'], 5445.0),
 ('nation', ['giant', 'state', 'capital'], 5351.0),
 ('seal', ['giant', 'nail', 'lock'], 5201.0),
 ('hold', ['lock', 'state', 'capital'], 5124.0),
 ('loose', ['thumb', 'nail', 'lock'], 5030.0),
 ('apparently', ['nail', 'lock', 'plane', 'ship', 'cell', 'capital'], 5024.0),
 ('hand', ['giant', 'thumb', 'nail', 'lock', 'capital'], 4888.5),
 ('bruise', ['thumb', 'nail', 'lock'], 4815.0),
 ('already', ['giant', 'state', 'capital'], 4747.0),
 ('boot', ['thumb', 'nail', 'lock'], 4692.5),
 ('saw', ['thumb', 'nail', 'lock'], 4587.0),
 ('bank', ['giant', 'state', 'capital'], 4361.0),
 ('punch', ['thumb', 'nail', 'lock'], 4293.0),
 ('grab', ['thumb', 'nail', 'lock'], 4151.0),
 ('yank', ['thumb', 'nail', 'lock'], 4137.0),
 ('instead', ['giant', 'thumb', 'lock', 'state', 'capital'], 4113.5),
 ('propellor', ['thumb', 'plane', 'ship'], 4105.0),
 ('clutch', ['thumb', 'nail', 'lock'], 4074.5),


In [101]:
s = set()
for similar in n.good_similars:
    for word in similar:
        s.add(word)

set(n.good_similars[1].keys()).intersection(set(n.good_similars[2].keys())).intersection(set(n.good_similars[8].keys()))

{'hand'}

In [74]:
ns = [100,500,1000,2000,3000,5000]
for n in ns:
    print(n)
    NishyBot2.TOP_N = n
    pregame(good, bad, okay, assassin)
    print()

100
virtuoso ['magician', 'fiddle']
store ['chain']
jeweller ['jeweler']
berth ['spot']
history ['record']
curved ['blade']
ipod ['apple']

500
virtuoso ['magician', 'fiddle']
store ['chain', 'jeweler']
season ['spot', 'record']
mower ['blade']
ipod ['apple']

1000
musician ['magician', 'jeweler', 'fiddle']
lead ['spot', 'record']
talbots ['chain', 'apple']
rotor ['blade']

2000
musician ['magician', 'jeweler', 'fiddle']
processor ['blade', 'chain', 'apple']
lead ['spot', 'record']

3000
store ['chain', 'jeweler', 'apple']
expertly ['blade', 'magician', 'fiddle']
lead ['spot', 'record']

5000
store ['chain', 'jeweler', 'apple']
spinning ['spot', 'blade', 'magician', 'fiddle']
all-time ['record']


In [224]:
NishyBot2.count_matches(n.bad_similars, 'spoiled')

0

In [227]:
n.good_similars[good.index('cone')].index('encase')

1