In [2]:
from copy import deepcopy
from itertools import product
import logging
import random

import pandas as pd
import numpy as np
from tabulate import tabulate
from termcolor import colored
from tqdm import tqdm


from word_grid import WordGrid, Direction



MIN_WORD_LEN = 3

In [3]:

def get_logger():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.ERROR)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.ERROR)
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    return logger

logger = get_logger()

In [4]:
puzzle = WordGrid((11,17))
puzzle

-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -

In [5]:
word_index = pd.read_csv("data/word_index.csv", encoding='utf-8')
dictionary = word_index[word_index["lang_code"] == "en"]
dictionary["word"] = dictionary["word"].astype(str)
dictionary = dictionary[dictionary["len"] >= MIN_WORD_LEN]
dictionary = dictionary[dictionary["len"] <= max(puzzle.shape)]
dictionary = dictionary[~dictionary["word"].str.contains(r"[0-9-]")]
print(len(dictionary))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dictionary["word"] = dictionary["word"].astype(str)


1169246


In [6]:
puzzle.reset()
while not puzzle.add_word((1,2), Direction.ACROSS, dictionary["word"].sample(1).item()):
    pass
print(puzzle)

-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  c  a  b  a  l  l  e  r  i  a  l  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -


In [7]:
def get_candidates(puzzle: WordGrid, position: tuple, direction: Direction, blacklist: list):

    candidates = dictionary[dictionary["word"].apply(lambda w: puzzle.validate_word(position, direction, w))]
    candidates = candidates[~candidates["word"].isin(blacklist)]
        
    return candidates
    

In [8]:
puzzle.reset()

n = 0
seed = random.randint(0, 1000)
target_n = 40
random.seed(seed)
direction = Direction.DOWN
word_list = []
snapshots = []
positions = {
    Direction.DOWN: {pos: [] for pos in product(range(puzzle.shape[1]), range(puzzle.shape[0] - MIN_WORD_LEN + 1))},
    Direction.ACROSS: {pos: [] for pos in product(range(puzzle.shape[1] - MIN_WORD_LEN + 1), range(puzzle.shape[0]))}
}
pbar = tqdm(total=target_n)
while n < target_n:
    if len(positions[direction]) == 0:
        if len(positions[Direction.flip(direction)]) == 0:
            break
        direction = Direction.flip(direction)
    
    position = random.choice(list(positions[direction]))
    
    blacklist = positions[direction][position] + word_list
    candidates = get_candidates(puzzle, position, direction, blacklist)

    if len(candidates) == 0:
        positions[direction].pop(position, None)
        continue
    
    try:
        weights = np.log(np.log(dictionary.freq.fillna(1)) + 1) + dictionary.len
        word = candidates.word.sample(1, weights=weights, random_state=seed).item()
    except:
        word = candidates.word.sample(1, random_state=12).item()
    
    pbar.update(1)
    pbar.set_description(f"word: {word}, pos: {position}, dir: {direction.name.lower()}, cnd: {len(candidates)}, slots {len(positions[Direction.DOWN])}d {len(positions[Direction.ACROSS])}a", refresh=True)
    
    if puzzle.add_word(position, direction, word):
        snapshots.append(({"position": position, "direction": direction, "word": word}, deepcopy(puzzle)))
        if len(positions[Direction.flip(direction)]) > 0:
            direction = Direction.flip(direction)

        positions[direction].pop(position, None)    
        word_list.append(word)
        n += 1
    else:
        positions[direction][position].append(word)
        logger.info(f"Can't place word {word} at {position}")

print(word_list)
print(puzzle)

word: hea, pos: (3, 1), dir: down, cnd: 245, slots 86d 44a:  92%|█████████▎| 37/40 [18:50<03:35, 71.80s/it]             

['Wyly', 'down bad', 'Ratledge', 'Cagneys', 'MbS', 'virginlike', 'thunder run', 'Dains', 'igtg', 'Sanju', 'Chavoyas', 'bhp', 'likkle', 'doskpop', 'xpost', 'lienal', 'Vale', 'Alavi', 'orf', 'Emm', 'yalla', 'yows', 'Bengi', 'yiked', 'neek', 'Bokyi', 'rvv', 'DSU', 'fkn', 'Ga.', 'induna', 'Cyd', 'arr', 'exserts', 'uji', 'tbch', 'hea']
-  -  -  -  c  y  d  -  a  l  a  v  i  -  m  t  x
t  b  c  h  -  -  d  s  u  -  g  a  .  -  b  h  p
-  e  h  e  -  e  m  m  -  i  -  l  -  -  s  u  o
r  n  a  a  n  -  -  c  a  g  n  e  y  s  -  n  s
a  g  v  -  e  x  s  e  r  t  s  -  a  -  f  d  t
t  i  o  l  e  -  v  i  r  g  i  n  l  i  k  e  -
l  -  y  i  k  e  d  -  -  -  -  -  l  n  n  r  r
e  -  a  k  -  -  d  o  w  n     b  a  d  -     v
d  o  s  k  p  o  p  -  y  o  w  s  -  u  u  r  v
g  r  -  l  i  e  n  a  l  -  -  s  a  n  j  u  -
e  f  -  e  -  b  o  k  y  i  -  -  d  a  i  n  s


In [9]:
for params, grid in snapshots:
    position, direction, word = list(params.values())
    print(f"word: {word}, pos: {position}, dir: {direction.name.lower()}")
    print(grid)

word: Wyly, pos: (8, 7), dir: down
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  w  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  y  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  l  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  y  -  -  -  -  -  -  -  -
word: down bad, pos: (6, 7), dir: across
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  d  o  

In [10]:
def custom_print(puzzle: WordGrid):
    to_print = []
    for chars, states in zip(puzzle.puzzle, puzzle.state):
        data = []
        for char, state in zip(chars, states):
            if state & Direction.ACROSS.value and state & Direction.DOWN.value:
                color = "magenta"
            elif state & Direction.ACROSS.value:
                color = "blue"
            elif state & Direction.DOWN.value:
                color = "yellow"
            else:
                color = "white"
            data.append(colored(char, color))
        to_print.append(data)
    print(tabulate(to_print))

In [11]:
custom_print(puzzle)

-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
[97m-[0m  [97m-[0m  [97m-[0m  [97m-[0m  [34mc[0m  [34my[0m  [34md[0m  [97m-[0m  [34ma[0m  [34ml[0m  [34ma[0m  [35mv[0m  [34mi[0m  [97m-[0m  [33mm[0m  [33mt[0m  [33mx[0m
[34mt[0m  [35mb[0m  [35mc[0m  [35mh[0m  [97m-[0m  [97m-[0m  [34md[0m  [34ms[0m  [34mu[0m  [97m-[0m  [34mg[0m  [35ma[0m  [34m.[0m  [97m-[0m  [35mb[0m  [35mh[0m  [35mp[0m
[97m-[0m  [33me[0m  [33mh[0m  [33me[0m  [97m-[0m  [34me[0m  [34mm[0m  [34mm[0m  [97m-[0m  [33mi[0m  [97m-[0m  [33ml[0m  [97m-[0m  [97m-[0m  [33ms[0m  [33mu[0m  [33mo[0m
[33mr[0m  [33mn[0m  [33ma[0m  [33ma[0m  [33mn[0m  [97m-[0m  [97m-[0m  [34mc[0m  [35ma[0m  [35mg[0m  [34mn[0m  [35me[0m  [35my[0m  [34ms[0m  [97m-[0m  [33mn[0m  [33ms[0m
[33ma[0m  [33mg[0m  [33mv[0m  [97m-[0m  [35me[0m  [34mx[0m  [34ms[0m  [34me[0m  [35mr[0m  [35mt[0m  [34ms[0m  [97m-

In [12]:
puzzle.validate_word((5, 2), Direction.DOWN, "mad")

False

In [13]:
dictionary

Unnamed: 0.1,Unnamed: 0,word,index,len,lang_code,pos,freq,cats,trans
0,0,abdominothoracic,2,16,en,adj,,,
1,1,abb wool,3,8,en,noun,,,
2,2,abearance,4,9,en,noun,,,
3,3,abbreviatory,5,12,en,adj,,,"['nl', 'nl', 'pt', 'pt']"
4,4,abandonedly,6,11,en,adv,,,
...,...,...,...,...,...,...,...,...,...
2814415,2814415,in my opinion,9645544,13,en,phrase,,,
2814416,2814416,copulate with,9645545,13,en,verb,,,
2814418,2814418,personal quality,9645550,16,en,noun,,,
2814419,2814419,physical property,9645551,17,en,noun,,,
