In [246]:
import gensim
from nltk.corpus import wordnet as wn

model = gensim.models.KeyedVectors.load_word2vec_format('gigaword-nocase-26.bin', binary=True)

In [318]:
class NishyBot2:
    # M goes from 1 to 5, score difference goes from 1/9 to 9
    SAMENESS_THRESHOLD = 0
    ALLOWED_POS = ['JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']

    def get_risk_from_score(my_score, their_score):
        norm = 1 - (2 * their_score - my_score + 7) / 24
        return norm

    def get_synonyms(word):
        return tuple(filter(lambda x: x != word and x.isalpha(), wn.synsets(word)[0].lemma_names()))

    def __init__(self, good, bad, okay, assassin, risk=-1):
        self.good = good
        self.bad = bad
        self.okay = okay
        self.assassin = assassin

        if risk == -1:
            risk = NishyBot2.get_risk_from_score(len(good), len(bad))

        self.M = 1 + 4 * risk
        self.TOP_N = int(self.M * 1000)
        print(self.TOP_N)

        self.good_similars = self.find_similars(good)
        self.bad_similars = self.find_similars(bad)
        self.okay_similars = self.find_similars(okay)
        self.assassin_similars = self.find_similars(assassin)

    def find_similars(self, words):
        def find_similar(word):
            return {x[0]: i for i, x in enumerate(model.most_similar(positive=[word], topn=self.TOP_N))}

        similars = []
        for word in words:
            try:
                similars.append(find_similar(word))
            except KeyError:
                synonyms = NishyBot2.get_synonyms(word)
                b = False
                for synonym in synonyms:
                    try:
                        similars.append(find_similar(synonym))
                        b = True
                        break
                    except KeyError:
                        continue
                if not b:
                    print('No similar words found for', word)
                    
        return similars

    def count_matches(self, wordlists, test):
        count = 0
        for wordlist in wordlists:
            if test in wordlist:
                count += self.f(wordlist[test])
        return count

    def f(self, x):
        k = self.TOP_N
        m = 10 ** self.M
        return k * (1 / (x + m) - 1 / (k + m)) / (1 / m - 1 / (k + m))

    def good_matches(self, test):
        result = []
        for i, similar in enumerate(self.good_similars):
            if test in similar:
                result.append(self.good[i])
        return result

    def score(self, test):
        good_count = self.count_matches(self.good_similars, test)
        bad_count = self.count_matches(self.bad_similars, test)
        okay_count = self.count_matches(self.okay_similars, test)
        assassin_count = self.count_matches(self.assassin_similars, test)

        return good_count - bad_count - 0.5 * okay_count - 3 * assassin_count  # maybe use a gan to optimize these parameters + TOP_N? not many things to optimize...

    def score_all(self, wordset):
        scores = [(x, self.score(x)) for x in wordset]
        scores.sort(key=lambda x: x[1], reverse=True)
        scores = dict(scores)
        return scores

    def score_all_pruned(self, wordset):
        scores = self.score_all(wordset)
        hints = list(scores.keys())

        for i, hint in enumerate(hints):
            if len(wn.synsets(hint)) == 0:  # if it's not a real word
                if hint in scores:
                    scores.pop(hint)
            elif pos_tag([hint])[0][1] not in NishyBot2.ALLOWED_POS:  # only allow nouns, adj, and verbs
                if hint in scores:
                    scores.pop(hint)
            else:  # if it's too similar to an existing word
                for word in self.good:
                    lword = word.lower()
                    lhint = hint.lower()
                    if lword in lhint or lhint in lword:
                        if hint in scores:
                            scores.pop(hint)
                            break

        return scores


In [322]:
def pregame(good, bad, okay, assassin):
    good2 = good.copy()
    while len(good2) != 0:
        n = NishyBot2(good2, bad, okay, assassin)

        s = set()
        for similar in n.good_similars:
            for word in similar:
                s.add(word)

        sc = n.score_all_pruned(s)

        hint = list(sc.keys())[0]
        matches = n.good_matches(hint)
        print(hint, matches)
        good2 = list(filter(lambda x: x not in matches, good2))


def game(good, bad, okay, assassin):
    n = NishyBot2(good, bad, okay, assassin)

    s = set()
    for similar in n.good_similars:
        for word in similar:
            s.add(word)

    sc = n.score_all_pruned(s)
    # print([(x, n.good_matches(x), sc[x]) for x in list(sc.keys())][:3])
    hint = list(sc.keys())[0]
    return hint, n.good_matches(hint)

In [323]:
# good = 'sack makeup bottle cuckoo cast cone jockey America'.lower().split(' ')
# bad = 'nut Russia fog break spider bear rip tube plane'.lower().split(' ')
# okay = 'Christmas pool Beijing trip nyc fever peanut'.lower().split(' ')
# assassin = 'link'.lower().split(' ')

good = 'spot,blade,chain,record,magician,jeweler,fiddle,apple'.lower().split(',')
bad = 'wonderland,Newton,glacier,pig,spy,lead,mess,duck,stable'.lower().split(',')
okay = 'India,millionaire,rainbow,razor,bridge,polo,Notre,Dame'.lower().split(',')
assassin = 'ice,cream'.lower().split(',')

# good = ['giant', 'thumb', 'nail', 'lock','plane', 'ship','cell','state', 'capital']
# bad = ['aztec', 'court','chocolate','space','snow']
# okay = ['shop','genius','ambulance','button','heart','pupil','vet']
# assassin = ['microscope']

# good = 'apple,sister,river,einstein,brazil,garden,china,bench,tip'.split(',')
# bad = 'ray,spider,king,arthur,rail,paste,cover,octopus'.split(',')
# okay = 'roll,magazine,worm,bucket,golf,vacuum,scientist'.split(',')
# assassin = ['code']

good = 'straddle'.lower().split(',')
bad = 'glasses,leaf'.lower().split(',')
okay = 'tail,lion,spike,inch,ivory,suit,cross'.lower().split(',')
assassin = 'knight'.lower().split(',')

In [325]:
game(bad, good, okay, assassin)

3833
[('showy', ['glasses', 'leaf'], 6088.870720898425), ('lustrous', ['glasses', 'leaf'], 5717.364611194678), ('luxuriant', ['glasses', 'leaf'], 5675.911710805372)]


('showy', ['glasses', 'leaf'])

In [185]:
pregame(bad, good, okay, assassin)

No similar words found for glasses


KeyboardInterrupt: 

In [113]:
s = set()
for similar in n.good_similars:
    for word in similar:
        s.add(word)

set(n.good_similars[1].keys()).intersection(set(n.good_similars[2].keys())).intersection(set(n.good_similars[8].keys()))

IndexError: list index out of range

In [159]:
ns = [100, 500, 1000, 2000, 3000, 5000]
for n in ns:
    print(n)
    NishyBot2.TOP_N = n
    pregame(good, bad, okay, assassin)
    print()

100
india ['brazil', 'china']
tributary ['river']
dugout ['bench']
lawn ['garden']
corner ['tip']
brother ['sister']
relativity ['einstein']
ipod ['apple']

500
india ['brazil', 'china']
substitute ['bench', 'tip']
pond ['river', 'garden']
sibling ['sister']
relativity ['einstein']
ipod ['apple']

1000
india ['brazil', 'china']
substitute ['bench', 'tip']
terrace ['river', 'garden']
sibling ['sister']
relativity ['einstein']
ipod ['apple']

2000
india ['brazil', 'china']
substitute ['bench', 'tip']
lake ['river', 'garden']
relativity ['einstein']
sibling ['sister']
ipod ['apple']

3000
india ['brazil', 'china']
substitute ['bench', 'tip']
lake ['river', 'garden']
relativity ['einstein']
sibling ['sister']
ipod ['apple']

5000
india ['brazil', 'china']
substitute ['bench', 'tip']
terrace ['river', 'garden']
freud ['einstein']
sibling ['sister']
ipod ['apple']


In [224]:
NishyBot2.count_matches(n.bad_similars, 'spoiled')

0

In [227]:
n.good_similars[good.index('cone')].index('encase')

1

In [129]:
from nltk.tag import pos_tag

pos_tag(['possibly'
         ])

[('possibly', 'RB')]