#### Get text from board screenshot

In [49]:
import easyocr
from PIL import Image, ImageFilter
import numpy as np

In [50]:
#Set team color and sc file name
team_color="R"
sc_file_name = "sc_9.png"

In [51]:
# opens screenshot
img = Image.open(sc_file_name)
img_arr = np.array(img)

In [52]:
# OCR
reader = easyocr.Reader(['en'])
results = reader.readtext(img_arr)

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [53]:
#fix the text from easyocr
board_0_fix = [i.replace("0","O") for _,i,_ in results]

from spellchecker import SpellChecker

spell = SpellChecker()

board = [spell.correction(w).upper() for w in board_0_fix]
board

['UNIFORM',
 'EGYPT',
 'BOOM',
 'ANTARCTICA',
 'AUSTRALIA',
 'HITCHHIKER',
 'CIRCLE',
 'BLACKBERRY',
 'CZECH',
 'GLASS',
 'ROCK',
 'HURRICANE',
 'HONEY',
 'HARBOR',
 'PISTOL',
 'VIOLIN',
 'MOLE',
 'PASS',
 'CALF']

In [54]:
#get good word list
img = img.convert('RGB')
l,w = img.size
l_unit = l//5
w_unit = w//5
#get array of square images in board
def get_sqr(x,y,img):
    return img.crop((x*l_unit,y*w_unit,(x+1)*l_unit,(y+1)*w_unit))
sqrs: list[Image.Image] = []
for i in range(25):
    sqrs.append(get_sqr(i%5,i//5,img))
sqrs_colors = []

for sqr in sqrs:
    #get dominant color
    if len(reader.readtext(np.array(sqr)))==0:
        continue
    r,g,b = np.array(sqr).mean(axis=(0,1))
    if r-b>100:
        sqrs_colors.append("R")
    elif b-r>100:
        sqrs_colors.append("B")
    elif abs(r-b)<10:
        sqrs_colors.append("F")
    else:
        sqrs_colors.append("W")


In [55]:
#get good word list
good_words = []
for i in range(len(board)):
    if sqrs_colors[i]==team_color:
        good_words.append(board[i])
good_words

#get bad word list
bad_words = list(set(board)-set(good_words))
good_words, bad_words

(['HITCHHIKER', 'BLACKBERRY', 'MOLE'],
 ['CZECH',
  'PISTOL',
  'HARBOR',
  'VIOLIN',
  'CALF',
  'HONEY',
  'CIRCLE',
  'ROCK',
  'EGYPT',
  'ANTARCTICA',
  'GLASS',
  'PASS',
  'UNIFORM',
  'AUSTRALIA',
  'BOOM',
  'HURRICANE'])

#### get word embeddings for each word in board

In [56]:
#import Libraries
from sentence_transformers import SentenceTransformer
import numpy as np
from unidecode import unidecode

In [57]:
#test getting word embeddings
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [58]:
#helpful functions
def norm(x):
    x = x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-9)
    return x
def embed(words):
    w = [unidecode(w.lower().replace("_"," ").strip()) for w in words]
    v = model.encode(w, normalize_embeddings=True)
    return w, v

In [59]:
#get top 50k words for our hint vocab
i = 0
vocab = set()
with open("words_100k.txt") as f:
    for line in f:
        if line.strip().isalpha():
            vocab.add(line.strip().lower())
        i+=1
        if i>=70000:
            break
vocab = [unidecode(w.lower().replace("_"," ").strip()) for w in vocab]
vocab = set(vocab)



In [60]:
#save or load vocab embeddings
import pickle

def save_vocab_embeddings():
    filename = "vocab_embeddings.pkl"
    vocab_obj = embed(vocab)
    with open(filename, 'wb') as file:
        # Use pickle.dump() to write the list to the file
        pickle.dump(vocab_obj, file)
        
def load_vocab_embeddings():
    filename = "vocab_embeddings.pkl"
    with open(filename, 'rb') as file:
        # Use pickle.load() to read the list from the file
        vocab_obj = pickle.load(file)
    return vocab_obj


In [61]:
#get board, good, bad and vocab word embeddings
_,board_embeddings = embed(board)
_,good_words_embeddings = embed(good_words)
_,bad_words_embeddings = embed(bad_words)
vocab,vocab_embeddings = load_vocab_embeddings()

In [62]:
#remove board words from vocab
mask = ~np.isin(vocab,[i.lower() for i in board])
print("Removed: ",len(vocab)-mask.sum())
vocab = np.array(vocab)[mask].tolist()
vocab_embeddings = vocab_embeddings[mask]

Removed:  18


In [63]:
#find centroid of good words
centroid = good_words_embeddings.mean(axis=0,keepdims=True)
centroid = norm(centroid)
centroid

array([[-1.70844086e-02,  3.28077041e-02, -9.96492710e-03,
         3.29651982e-02, -2.60916855e-02, -6.25237823e-02,
         1.66657060e-01,  6.16772659e-02,  7.17411423e-03,
        -1.57453008e-02,  5.09533100e-02,  1.43058887e-02,
        -2.79744691e-03,  1.20121939e-02, -6.20053634e-02,
         1.15855401e-02,  4.23150733e-02, -1.74682364e-02,
        -3.52504626e-02, -3.35720442e-02, -1.30977824e-01,
         5.67573942e-02,  3.08938790e-02,  2.04335134e-02,
        -6.61909804e-02,  3.98715250e-02,  9.20761004e-02,
         5.30911274e-02, -9.23054516e-02, -1.17117770e-01,
        -2.63161259e-03,  5.99673875e-02,  2.18393784e-02,
         4.40487899e-02, -6.79206625e-02, -5.48422001e-02,
        -6.29690960e-02,  5.61131462e-02,  1.05403522e-02,
        -3.01615074e-02, -4.71010283e-02, -1.02344319e-01,
         2.86004543e-02,  6.54741451e-02, -2.09768992e-02,
        -6.41470030e-02, -1.92880370e-02,  3.68529111e-02,
         5.87033480e-02,  1.33585380e-02,  5.78915142e-0

In [64]:
#Scoring similarity
 
sim_good = vocab_embeddings @ good_words_embeddings.T                   # [V, |G|]
sim_bad_max = vocab_embeddings @ bad_words_embeddings.T.max(axis=1)     # [V]
sim_bad_mean = vocab_embeddings @ bad_words_embeddings.T.mean(axis=1)   # [V]

lambda1, lambda2 = 1.0, 0.25
score = sim_good.mean(axis=1) - lambda1*sim_bad_max - lambda2*sim_bad_mean

idx = np.argsort(-score)[:50]
candidates = [(vocab[i], float(score[i])) for i in idx]
candidates



[('moles', 0.10924854874610901),
 ('molest', 0.09962919354438782),
 ('nanon', 0.08455446362495422),
 ('gwynplaine', 0.07764489203691483),
 ('mutations', 0.0760386511683464),
 ('hedgehog', 0.07237169146537781),
 ('kindles', 0.06526632606983185),
 ('gnomes', 0.06229487061500549),
 ('carcases', 0.059284478425979614),
 ('shopman', 0.058759160339832306),
 ('anker', 0.05683840438723564),
 ('molemmat', 0.054599568247795105),
 ('pawnbroker', 0.052610598504543304),
 ('immobiles', 0.04941490292549133),
 ('celerity', 0.04798945412039757),
 ('roderigo', 0.04651390388607979),
 ('mice', 0.04568588361144066),
 ('splendors', 0.045368097722530365),
 ('townsman', 0.04472794756293297),
 ('haphazard', 0.04378090053796768),
 ('immobile', 0.04269792139530182),
 ('hogsheads', 0.04248522222042084),
 ('supercargo', 0.0419965460896492),
 ('incorporeal', 0.04158192500472069),
 ('miteinander', 0.041553810238838196),
 ('maailman', 0.04128439724445343),
 ('eitelkeit', 0.04120418801903725),
 ('grossvater', 0.0404190

In [65]:
def score_choices_order(choices,good_words=good_words):
    k = 0
    for c,_ in choices:
        if c in good_words:
            k+=1
        else:
            break
    return k
def get_choices_from_hint(hint,k=25):
    hint_embedding = model.encode(hint, normalize_embeddings=True).reshape(1,-1)
    sims = (board_embeddings @ hint_embedding.T).ravel()
    return [(board[i], float(sims[i])) for i in np.argsort(-sims)][:k]
def score_candidates(candidates=candidates):
    k_arr = []
    for w,_ in candidates:
        k_arr.append(score_choices_order(get_choices_from_hint(w)))
    return k_arr



In [66]:
#pick best candidate
def pick_best_candidate():
    candidate_scores = score_candidates()
    np.argmax(candidate_scores)
    candidates[np.argmax(score_candidates())][0]
    return candidates[np.argmax(score_candidates())][0],max(candidate_scores)
# pick_best_candidate()
pick_best_candidate()

('mutations', 2)

In [None]:
#get choices from hint
h,k = pick_best_candidate()
get_choices_from_hint(h,2)



[('MOLE', 0.27580273151397705),
 ('HITCHHIKER', 0.17183254659175873),
 ('PISTOL', 0.15680620074272156),
 ('BLACKBERRY', 0.15599700808525085),
 ('GLASS', 0.14699983596801758),
 ('CIRCLE', 0.14400619268417358),
 ('BOOM', 0.13913880288600922),
 ('ROCK', 0.125931978225708),
 ('CALF', 0.11227866262197495),
 ('HURRICANE', 0.10929937660694122),
 ('PASS', 0.10829814523458481),
 ('ANTARCTICA', 0.09119860082864761),
 ('UNIFORM', 0.08480668067932129),
 ('EGYPT', 0.07337921857833862),
 ('CZECH', 0.06541817635297775),
 ('HONEY', 0.05778663977980614),
 ('HARBOR', 0.051817089319229126),
 ('AUSTRALIA', 0.04731404408812523),
 ('VIOLIN', 0.015002928674221039)]