<a href="https://colab.research.google.com/github/ThomKirwanEvans/CodenamesAI/blob/master/Codenames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Codenames AI


In [1]:
# Imports
import tensorflow_hub as hub
from matplotlib import pyplot as plt
import numpy as np
import os
import requests
import itertools
import seaborn as sns
sns.set()

# helper functions
# calc distances
def calc_dist(set1,set2):
    # euclidean dist to start with
    dmat = np.zeros((set1.shape[0],set2.shape[0]))
    for i in range(set1.shape[0]):
        dmat[i,:] = (((set1[i,:]-set2)**2)**0.5).sum(axis=1)
        
    # do the square dist
    # dmat = dmat ** 2
    return dmat

def remove_words(dists,set_words,clue_words,neg=False):
    for w in set_words:
        if w in clue_words:
            if neg:
                dists[clue_words.index(w)] = -1e6
            else:
                dists[clue_words.index(w)] = 1e6
    return dists



  import pandas.util.testing as tm


# Word lists
Download or upload and do some basic preprocessing

In [2]:
# get clue words
clue_words = []
url = 'https://raw.githubusercontent.com/first20hours/google-10000-english/master/20k.txt' # 20k common words
#url = 'https://raw.githubusercontent.com/dwyl/english-words/master/words.txt' # 479k english words, too many really
#url = 'https://www.mit.edu/~ecprice/wordlist.10000' # 10k words
#url = 'https://raw.githubusercontent.com/imsky/wordlists/master/ipsum/hipster.txt'
#url = 'https://raw.githubusercontent.com/imsky/wordlists/master/ipsum/corporate.txt'
#url = 'https://raw.githubusercontent.com/imsky/wordlists/master/adjectives/character.txt'
#url = 'https://raw.githubusercontent.com/imsky/wordlists/master/adjectives/colors.txt'
force_update = True
if not(os.path.exists('clues.txt')) or force_update:
  print('Downloading clues')
  r = requests.get(url)
  with open('clues.txt', 'wb') as f:
      f.write(r.content)

files = ['clues']
for F in files:
    with open(F + '.txt', 'r') as file:
        Lines = file.readlines() 
        # Strips the newline character 
        for line in Lines: 
            clue_words += [line.strip().lower()]
            
# remove plurals to avoid 'fighters' being a clue for 'fighter'
# really simple for now - anything that ends in an s
plural_ix = [x[-1] != 's' for x in clue_words]
clue_words = np.array(clue_words)[plural_ix]
# remove short words
ix = [len(x)>3 for x in clue_words]
clue_words = clue_words[ix]
# remove hyphenated words
ix = ['-' not in x for x in clue_words]
clue_words = clue_words[ix]
clue_words = list(clue_words)

print(len(clue_words),'clues in list')

Downloading clues
14006 clues in list


# Calculate Embeddings
Download a few models from TF Hub - 

In [3]:
USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
SWIVEL = hub.load("https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1")
#W2V = hub.load("https://tfhub.dev/google/Wiki-words-250/2")
#NNLM = hub.load("https://tfhub.dev/google/nnlm-en-dim128/2")

Set the current game words and turn here - delete as words are removed from the game

In [6]:
TURN = 'blue'
# colours refer to those used in the game
RED = ['jam','light','olympus','model','jack','pitch','belt','mass','egypt']
BLUE = ['grass','apple','kangaroo','note','novel','tablet','kid','spell']
grey_words = ['round','button','crane','wind','sink','superhero','shop']
black_words = ['suit']

# Each team can use different embedding models for a bit of variety
if TURN == 'red':
  your_words = RED
  opp_words = BLUE
  embed = USE
else:
  your_words = BLUE
  opp_words = RED
  embed = USE


game_words = your_words + opp_words + grey_words + black_words

# calculate all embeddings
clue_embed = embed(clue_words).numpy()
your_embed = embed(your_words).numpy()
opp_embed = embed(opp_words).numpy()
grey_embed = embed(grey_words).numpy()
black_embed = embed(black_words).numpy()


# find distances
# using mean for now, maybe median would be best
clue_your_full = calc_dist(clue_embed,your_embed)
clue_your = remove_words(np.max(clue_your_full,axis=1),game_words,clue_words)
clue_opp = np.min(calc_dist(clue_embed,opp_embed),axis=1)
clue_grey = np.min(calc_dist(clue_embed,grey_embed),axis=1)
clue_black = np.min(calc_dist(clue_embed,black_embed),axis=1)


In [8]:
# Find the best clue for 1 to N words
num_your = your_embed.shape[0]
# get the base distance - we only need to calculate this once
base = clue_opp*2 + clue_grey/2 + clue_black**0.5
# set a max base distance
mx = np.percentile(base,95)
base[base>mx] = mx
best_ixs = []
link_to = []
for n_tries in range(1,num_your+1):
    _best = -1e6
    best_answer = ''
    
    for comb in itertools.combinations(range(num_your), n_tries):
        # calculate the distance to a given set of clues
        _clue_your = remove_words(np.max(clue_your_full[:,comb],axis=1),game_words,clue_words)
        dists = base -_clue_your*3

        best_word = clue_words[dists.argmax()]
        best_val = np.round(dists.max(),2)
        target_words = '-'.join(list(np.array(your_words)[list(comb)]))
        if best_val > _best:
            _best = best_val
            best_ix = dists.argmax()
            best_answer = ' '.join([target_words,str(best_val),best_word,str(best_ix)])
            top3 = dists.argsort()[::-1][1:3]
            link_words = comb
    print(best_answer)
    best_ixs += [best_ix]
    link_to += [link_words]


note 22.16 notice 457
note-novel 1.5 revised 2042
novel-kid-spell -1.15 beloved 7176
apple-novel-kid-spell -2.86 adolescent 6496
apple-novel-tablet-kid-spell -3.99 scholastic 11622
grass-apple-kangaroo-tablet-kid-spell -5.76 pony 7190
grass-apple-kangaroo-novel-tablet-kid-spell -6.6 diabetic 7562
grass-apple-kangaroo-note-novel-tablet-kid-spell -7.57 hound 11498


# Player AI
Much simpler - either use the game words defined above or write them in here

In [10]:
# guesser embeddings
embed = USE
# clue 
clue_word = ['enterprise']
N = 2 

game_words = []
game_words = your_words + opp_words + grey_words + black_words

single_clue_embed = embed(clue_word).numpy()
game_embed = embed(game_words).numpy()
# find the words clostes to the clue
clue_game = np.min(calc_dist(game_embed,single_clue_embed),axis=1)
N += 2 # we'll look at a couple of extra guesses
topN = clue_game.argsort()[:N]
for i in topN:
  print(game_words[i],np.round(clue_game[i],2))



apple 20.57
model 21.17
suit 21.33
superhero 21.93
