In [223]:
from enum import Enum
from itertools import product
import logging
from logging import Logger
import random

import pandas as pd
import numpy as np
from tabulate import tabulate
from termcolor import colored
from tqdm import tqdm



MIN_WORD_LEN = 3

In [224]:

def get_logger():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.ERROR)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.ERROR)
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    return logger

logger = get_logger()

In [225]:
class Direction(Enum):
    NONE = 0
    ACROSS = 1
    DOWN = 2
    BLOCKED = 4
    
    @classmethod
    def flip(cls, direction):
        if direction == cls.ACROSS:
            return cls.DOWN
        elif direction == cls.DOWN:
            return cls.ACROSS
        else:
            return direction

In [226]:
class WordGrid:
    def __init__(self, shape: tuple, logger: Logger = None, verbose=False) -> None:
        self.puzzle = np.full(shape, '-', dtype=np.str_)
        self.shape = np.array(self.puzzle.shape)
        self.state = np.zeros(shape, dtype=np.int8)
        self.verbose = verbose
        self.logger = logger

    def __str__(self) -> str:
        return str(tabulate(self.puzzle, tablefmt="plain"))
    
    def __repr__(self) -> str:
        return str(self)
    
    def flip(self):
        self.puzzle = self.puzzle.T
        self.shape = self.shape[::-1]
        self.state = self.state.T
    
    def reset(self) -> None:
        self.puzzle[:] = '-'
        self.state[:] = 0
        
    def validate_word(self, x: int, y: int, direction: Direction, word: str) -> bool:
        if len(word) + x > self.puzzle.shape[0]:
            # Word is too long for where it is placed
            if self.verbose:
                self.logger.warning(f"Cannot place word of length {len(word)}, '{word}' at {(x, y)}")
            return False
        elif (self.state[x:x + len(word), y] & direction.value).any():
            # Word is overlapping with an other word in the same direction
            if self.verbose:
                self.logger.warning(f"Word overlap detected while trying to place '{word}' at {(x, y)}")
            return False
        elif x - 1 > 0 and self.state[x - 1, y] != Direction.NONE.value:
            # There is a letter just before the beginning of the word
            if self.verbose:
                self.logger.warning(f"Word interference detected while trying to place '{word}' at {(x, y)}")
            return False
        elif x + len(word) + 1 < self.shape[0] and self.state[x + len(word) + 1, y] != Direction.NONE.value:
            # There is a letter just after the beginning of the word
            if self.verbose:
                self.logger.warning(f"Word interference detected while trying to place '{word}' at {(x, y)}")
            return False
        
        return True


    def add_word(self, position: tuple, direction: Direction, word: str) -> bool:
        
        if direction == Direction.ACROSS:
            self.flip()
            x, y = position
        else:
            y, x = position

        is_valid = self.validate_word(x, y, direction, word)
        
        if is_valid:
            self.puzzle[x:x + len(word), y] = list(word.lower())
            self.state[x:x + len(word), y] |= direction.value
        
        if direction == Direction.ACROSS:
            self.flip()
        
        return is_valid


    def get_letters(self, position: tuple, direction: Direction, length: int):
        y, x = position
        letters = []
        
        if direction == Direction.ACROSS:
            self.flip()
            x, y = position
        else:
            y, x = position
            
        for i, letter in enumerate(self.puzzle[x:x + length, y]):
            if letter == '-':
                continue
            letters.append((i, letter))
        
        if direction == Direction.ACROSS:
            self.flip()
        
        return letters


    def get_letter(self, position: tuple):
        return self.puzzle[position[1], position[0]].decode()

In [227]:
puzzle = WordGrid((5,10))
puzzle

-  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -

In [228]:
word_index = pd.read_csv("data/word_index.csv", encoding='utf-8')
dictionary = word_index[word_index["lang_code"] == "en"]
dictionary["word"] = dictionary["word"].astype(str)
dictionary = dictionary[dictionary["len"] >= MIN_WORD_LEN]
dictionary = dictionary[dictionary["len"] <= max(puzzle.shape)]
dictionary = dictionary[~dictionary["word"].str.contains(r"[0-9]")]
print(len(dictionary))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dictionary["word"] = dictionary["word"].astype(str)


726021


In [240]:
"who" in sorted(dictionary["word"].to_list())

True

In [230]:
while not puzzle.add_word((1,2), Direction.DOWN, dictionary["word"].sample(1).item()):
    pass
print(puzzle)
puzzle.reset()

-  -  -  -  -  -  -  -  -  -
-  -  -  -  -  -  -  -  -  -
-  l  -  -  -  -  -  -  -  -
-  a  -  -  -  -  -  -  -  -
-  b  -  -  -  -  -  -  -  -


In [236]:
def get_candidates(puzzle: WordGrid, position: tuple, direction: Direction, blacklist: list):
    if direction == Direction.ACROSS:
        x, y = position
        max_len = puzzle.shape[1] - position[0]
        puzzle.flip()
    else:
        max_len = puzzle.shape[0] - position[1]
        y, x = position

    candidates = dictionary[dictionary["word"].apply(lambda w: puzzle.validate_word(x, y, direction, w))]
    
    if direction == Direction.ACROSS:
        puzzle.flip()
    
    candidates = candidates[~candidates["word"].isin(blacklist)]
        
    letters = puzzle.get_letters(position, direction, max_len)
    for index, letter in letters:
        candidates = candidates[candidates["word"].str[index] == letter]
        
    return candidates
    

In [246]:
n = 0
direction = Direction.DOWN
word_list = []
puzzle.reset()
positions = {
    Direction.DOWN: {pos: [] for pos in product(range(puzzle.shape[1]), range(puzzle.shape[0] - MIN_WORD_LEN))},
    Direction.ACROSS: {pos: [] for pos in product(range(puzzle.shape[1] - MIN_WORD_LEN), range(puzzle.shape[0]))}
}
random.seed(12)
pbar = tqdm()
while n < 12:
    if len(positions[direction]) == 0:
        if len(positions[Direction.flip(direction)]) == 0:
            break
        direction = Direction.flip(direction)
    
    position = random.choice(list(positions[direction]))
    
    blacklist = positions[direction][position] + word_list
    candidates = get_candidates(puzzle, position, direction, blacklist)

    if len(candidates) == 0:
        positions[direction].pop(position, None)
        continue
    
    try:
        word = candidates["word"].sample(1, weights=candidates.freq).item()
    except:
        word = candidates["word"].sample(1).item()
    
    pbar.update(n)
    #pbar.set_description(f"word: {word}, pos: {position}, dir: {direction.name.lower()}, cnd: {len(candidates)}, slots {len(positions[Direction.DOWN])}", refresh=True)
    print(f"word: {word}, pos: {position}, dir: {direction.name.lower()}, cnd: {len(candidates)}, slots {len(positions[Direction.DOWN])}d {len(positions[Direction.ACROSS])}a")
    
    if puzzle.add_word(position, direction, word):
        if len(positions[Direction.flip(direction)]) > 0:
            direction = Direction.flip(direction)

        positions[direction].pop(position, None)    
        word_list.append(word)
        n += 1
        #print(puzzle)
    else:
        positions[direction][position].append(word)
        logger.info(f"Can't place word {word} at {position}")

print(word_list)
print(puzzle)

66it [21:56, 19.94s/it]
0it [00:00, ?it/s]

word: new, pos: (7, 1), dir: down, cnd: 48458, slots 20d 35a


1it [00:03,  2.50s/it]

word: those, pos: (3, 2), dir: across, cnd: 38210, slots 20d 35a


3it [00:03,  1.09it/s]

word: well, pos: (8, 0), dir: down, cnd: 100054, slots 20d 35a


6it [00:08,  1.26s/it]

word: more, pos: (1, 4), dir: across, cnd: 579324, slots 20d 34a


10it [00:09,  1.46it/s]

word: its, pos: (6, 0), dir: down, cnd: 4282, slots 20d 34a


15it [00:13,  1.24it/s]

word: interview, pos: (0, 0), dir: across, cnd: 46, slots 20d 33a


21it [00:15,  1.80it/s]

word: thee, pos: (4, 1), dir: down, cnd: 34, slots 17d 33a


28it [00:23,  1.22it/s]

word: behowls, pos: (3, 3), dir: across, cnd: 2, slots 17d 27a


36it [00:23,  1.98it/s]

word: that, pos: (0, 1), dir: down, cnd: 48433, slots 17d 27a


45it [00:58,  1.85s/it]

word: Gestetner, pos: (1, 1), dir: across, cnd: 1, slots 17d 8a


55it [01:00,  1.18s/it]

word: ngram, pos: (1, 0), dir: down, cnd: 1, slots 15d 8a


66it [01:08,  1.02s/it]

word: same, pos: (6, 4), dir: across, cnd: 48428, slots 15d 3a
['new', 'those', 'well', 'more', 'its', 'interview', 'thee', 'behowls', 'that', 'Gestetner', 'ngram', 'same']
i  n  t  e  r  v  i  e  w  -
t  g  e  s  t  e  t  n  e  r
h  r  -  t  h  o  s  e  l  -
a  a  -  b  e  h  o  w  l  s
t  m  o  r  e  -  s  a  m  e


In [233]:
def custom_print(puzzle: WordGrid):
    to_print = []
    for chars, states in zip(puzzle.puzzle, puzzle.state):
        data = []
        for char, state in zip(chars, states):
            if state & Direction.ACROSS.value and state & Direction.DOWN.value:
                color = "magenta"
            elif state & Direction.ACROSS.value:
                color = "blue"
            elif state & Direction.DOWN.value:
                color = "yellow"
            else:
                color = "white"
            data.append(colored(char, color))
        to_print.append(data)
    print(tabulate(to_print))

In [247]:
custom_print(puzzle)

-  -  -  -  -  -  -  -  -  -
[34mi[0m  [35mn[0m  [34mt[0m  [34me[0m  [34mr[0m  [34mv[0m  [35mi[0m  [34me[0m  [35mw[0m  [97m-[0m
[33mt[0m  [35mg[0m  [34me[0m  [34ms[0m  [35mt[0m  [34me[0m  [35mt[0m  [35mn[0m  [35me[0m  [34mr[0m
[33mh[0m  [33mr[0m  [97m-[0m  [34mt[0m  [35mh[0m  [34mo[0m  [35ms[0m  [35me[0m  [33ml[0m  [97m-[0m
[33ma[0m  [33ma[0m  [97m-[0m  [34mb[0m  [35me[0m  [34mh[0m  [34mo[0m  [35mw[0m  [35ml[0m  [34ms[0m
[33mt[0m  [35mm[0m  [34mo[0m  [34mr[0m  [35me[0m  [97m-[0m  [34ms[0m  [34ma[0m  [34mm[0m  [34me[0m
-  -  -  -  -  -  -  -  -  -
