#### Get text from board screenshot

In [38]:
import easyocr
from PIL import Image, ImageFilter
import numpy as np

In [39]:
#Set team color and sc file name
team_color="R"
sc_file_name = "sc_7.png"

In [40]:
# opens screenshot
img = Image.open(sc_file_name)
img_arr = np.array(img)

In [41]:
# OCR
reader = easyocr.Reader(['en'])
results = reader.readtext(img_arr)

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [42]:
#fix the text from easyocr
board_0_fix = [i.replace("0","O") for _,i,_ in results]

from spellchecker import SpellChecker

spell = SpellChecker()

board = [spell.correction(w).upper() for w in board_0_fix]
board

['CONDUCTOR',
 'COURT',
 'QUEEN',
 'CLIFF',
 'RING',
 'CONTRACT',
 'ENGINE',
 'CHANGE',
 'DOCTOR',
 'CAT',
 'BILL',
 'STREAM',
 'KITE',
 'COCONUT',
 'CAP',
 'COLD',
 'RAY',
 'HITCHHIKER',
 'BOOM',
 'MARBLE',
 'SEED',
 'AGENT',
 'ZEBRA',
 'FRANCE',
 'PITCH']

In [43]:
#get good word list
img = img.convert('RGB')
l,w = img.size
l_unit = l//5
w_unit = w//5
#get array of square images in board
def get_sqr(x,y,img):
    return img.crop((x*l_unit,y*w_unit,(x+1)*l_unit,(y+1)*w_unit))
sqrs: list[Image.Image] = []
for i in range(25):
    sqrs.append(get_sqr(i%5,i//5,img))
sqrs_colors = []

for sqr in sqrs:
    #get dominant color
    if len(reader.readtext(np.array(sqr)))==0:
        continue
    r,g,b = np.array(sqr).mean(axis=(0,1))
    if r-b>100:
        sqrs_colors.append("R")
    elif b-r>100:
        sqrs_colors.append("B")
    elif abs(r-b)<10:
        sqrs_colors.append("F")
    else:
        sqrs_colors.append("W")


In [44]:
#get good word list
good_words = []
for i in range(len(board)):
    if sqrs_colors[i]==team_color:
        good_words.append(board[i])
good_words

#get bad word list
bad_words = list(set(board)-set(good_words))
good_words, bad_words

(['CHANGE', 'CAT', 'KITE', 'COCONUT', 'COLD', 'BOOM', 'MARBLE', 'PITCH'],
 ['ENGINE',
  'DOCTOR',
  'CLIFF',
  'QUEEN',
  'CONTRACT',
  'HITCHHIKER',
  'RING',
  'ZEBRA',
  'BILL',
  'COURT',
  'SEED',
  'CONDUCTOR',
  'FRANCE',
  'AGENT',
  'CAP',
  'RAY',
  'STREAM'])

#### get word embeddings for each word in board

In [45]:
#import Libraries
from sentence_transformers import SentenceTransformer
import numpy as np
from unidecode import unidecode

In [46]:
#test getting word embeddings
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [47]:
#helpful functions
def norm(x):
    x = x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-9)
    return x
def embed(words):
    w = [unidecode(w.lower().replace("_"," ").strip()) for w in words]
    v = model.encode(w, normalize_embeddings=True)
    return w, v

In [48]:
#get top 50k words for our hint vocab
i = 0
vocab = set()
with open("google-10000-english-usa.txt") as f:
    for line in f:
        if line.strip().isalpha():
            vocab.add(line.strip().lower())
        i+=1
        if i>=70000:
            break
vocab = [unidecode(w.lower().replace("_"," ").strip()) for w in vocab]
vocab = set(vocab)



In [None]:
#save or load vocab embeddings
import pickle

def save_vocab_embeddings(fname):
    filename = fname
    vocab_obj = embed(vocab)
    with open(filename, 'wb') as file:
        # Use pickle.dump() to write the list to the file
        pickle.dump(vocab_obj, file)
        
def load_vocab_embeddings(fname):
    filename = fname
    with open(filename, 'rb') as file:
        # Use pickle.load() to read the list from the file
        vocab_obj = pickle.load(file)
    return vocab_obj
#save_vocab_embeddings("google_word_embeddings.pkl")

In [57]:
#get board, good, bad and vocab word embeddings
_,board_embeddings = embed(board)
_,good_words_embeddings = embed(good_words)
_,bad_words_embeddings = embed(bad_words)
#vocab,vocab_embeddings = load_vocab_embeddings("google_word_embeddings.pkl")
vocab,vocab_embeddings = load_vocab_embeddings("vocab_embeddings.pkl")

In [58]:
#remove board words from vocab
mask = ~np.isin(vocab,[i.lower() for i in board])
print("Removed: ",len(vocab)-mask.sum())
vocab = np.array(vocab)[mask].tolist()
vocab_embeddings = vocab_embeddings[mask]

Removed:  23


In [59]:
#find centroid of good words
centroid = good_words_embeddings.mean(axis=0,keepdims=True)
centroid = norm(centroid)
centroid

array([[-1.51653392e-02,  6.27794340e-02, -2.53230166e-02,
         4.27070484e-02, -6.24649860e-02, -1.45067926e-02,
         1.30562037e-01,  2.05520671e-02,  2.68272925e-02,
        -1.34205469e-03,  3.12098060e-02, -9.87825319e-02,
        -4.05647084e-02,  1.38952620e-02,  2.30840724e-02,
         2.05444749e-02, -1.59344214e-04, -8.40741023e-03,
        -1.04819022e-01, -2.03982908e-02, -4.22155634e-02,
         1.11057512e-01, -1.98554602e-02,  3.59184816e-02,
        -2.18729880e-02,  6.54373765e-02, -3.64763476e-02,
         3.90573815e-02, -7.71450810e-03, -1.37572646e-01,
        -7.12372269e-03,  5.58719300e-02,  3.33362557e-02,
         2.01537227e-03, -6.47518560e-02, -1.47552323e-02,
        -1.71499234e-02,  1.34375738e-02,  5.74421212e-02,
         4.07887623e-03, -1.45846955e-03, -8.65355358e-02,
         3.48013081e-02,  2.64546387e-02,  2.61521731e-02,
         4.41160426e-02,  2.62327902e-02,  3.14460881e-02,
         7.50530809e-02,  7.48924259e-03, -8.92616361e-0

In [74]:
#Scoring similarity
 
sim_good = vocab_embeddings @ good_words_embeddings.T                   # [V, |G|]
sim_bad_max = vocab_embeddings @ bad_words_embeddings.T.max(axis=1)     # [V]
sim_bad_mean = vocab_embeddings @ bad_words_embeddings.T.mean(axis=1)   # [V]

lambda1, lambda2 = 1.0, 0.25
score = sim_good.mean(axis=1) - lambda1*sim_bad_max - lambda2*sim_bad_mean

idx = np.argsort(-score)[:50]
candidates = [(vocab[i], float(score[i])) for i in idx]
candidates



[('dissimilarity', 0.07369671016931534),
 ('inconsistencies', 0.07282659411430359),
 ('cataclysm', 0.06765158474445343),
 ('shirked', 0.06709793210029602),
 ('indiscriminately', 0.06506812572479248),
 ('responsive', 0.06417123228311539),
 ('variability', 0.06280888617038727),
 ('fluttering', 0.06073378771543503),
 ('voluptuousness', 0.059847142547369),
 ('mildness', 0.05762895196676254),
 ('meanings', 0.05734650790691376),
 ('crunching', 0.05680336803197861),
 ('inflaming', 0.05256607383489609),
 ('disapprobation', 0.05091044306755066),
 ('fluctuation', 0.05034840852022171),
 ('figurative', 0.049643754959106445),
 ('dampness', 0.049103520810604095),
 ('upbraiding', 0.047848913818597794),
 ('deprecatingly', 0.04759148508310318),
 ('picnics', 0.04742325469851494),
 ('deviations', 0.04687386378645897),
 ('vagueness', 0.04569637030363083),
 ('indescribable', 0.04535086452960968),
 ('cope', 0.045253295451402664),
 ('trembling', 0.045196983963251114),
 ('misanthropy', 0.044733501970767975),


In [75]:
def score_choices_order(choices,good_words=good_words):
    k = 0
    for c,_ in choices:
        if c in good_words:
            k+=1
        else:
            break
    return k
def get_choices_from_hint(hint,k=25):
    hint_embedding = model.encode(hint, normalize_embeddings=True).reshape(1,-1)
    sims = (board_embeddings @ hint_embedding.T).ravel()
    return [(board[i], float(sims[i])) for i in np.argsort(-sims)][:k]
def score_candidates(candidates=candidates):
    k_arr = []
    for w,_ in candidates:
        k_arr.append(score_choices_order(get_choices_from_hint(w)))
    return k_arr



In [76]:
#pick best candidate
def pick_best_candidate():
    candidate_scores = score_candidates()
    np.argmax(candidate_scores)
    candidates[np.argmax(score_candidates())][0]
    return candidates[np.argmax(score_candidates())][0],max(candidate_scores)
# pick_best_candidate()
pick_best_candidate()

('fluttering', 5)

In [77]:
#get choices from hint
h,k = pick_best_candidate()
get_choices_from_hint(h)



[('KITE', 0.42257681488990784),
 ('PITCH', 0.2883622646331787),
 ('CAT', 0.2805821895599365),
 ('COLD', 0.2404635101556778),
 ('BOOM', 0.23172815144062042),
 ('ZEBRA', 0.22945865988731384),
 ('STREAM', 0.22625955939292908),
 ('SEED', 0.22466379404067993),
 ('RING', 0.22420530021190643),
 ('QUEEN', 0.21881583333015442),
 ('ENGINE', 0.21773207187652588),
 ('COCONUT', 0.21378174424171448),
 ('MARBLE', 0.21198558807373047),
 ('CONTRACT', 0.1903490126132965),
 ('CHANGE', 0.18641580641269684),
 ('CLIFF', 0.1753355860710144),
 ('HITCHHIKER', 0.17176111042499542),
 ('CONDUCTOR', 0.16202419996261597),
 ('COURT', 0.16193248331546783),
 ('DOCTOR', 0.16114285588264465),
 ('RAY', 0.16012291610240936),
 ('BILL', 0.15813156962394714),
 ('FRANCE', 0.14022505283355713),
 ('AGENT', 0.137843057513237),
 ('CAP', 0.04342864453792572)]