In [2]:
import string
import re
import random
import collections
import copy

from enum import Enum
Direction = Enum('Direction', 'left right up down upleft upright downleft downright')
    
delta = {Direction.left: (0, -1),Direction.right: (0, 1), 
         Direction.up: (-1, 0), Direction.down: (1, 0), 
         Direction.upleft: (-1, -1), Direction.upright: (-1, 1), 
         Direction.downleft: (1, -1), Direction.downright: (1, 1)}

cat = ''.join
wcat = ' '.join
lcat = '\n'.join

In [3]:
# all_words = [w.strip() for w in open('/usr/share/dict/british-english').readlines()
#             if all(c in string.ascii_lowercase for c in w.strip())]
# words = [w for w in all_words
#          if not any(w in w2 for w2 in all_words if w != w2)]
# open('wordsearch-words', 'w').write(lcat(words))

In [4]:
ws_words = [w.strip() for w in open('wordsearch-words').readlines()
            if all(c in string.ascii_lowercase for c in w.strip())]
ws_words[:10]

['aardvarks',
 'abaci',
 'abacuses',
 'abaft',
 'abalones',
 'abandoned',
 'abandoning',
 'abandonment',
 'abandons',
 'abased']

In [5]:
def empty_grid(w, h):
    return [['.' for c in range(w)] for r in range(h)]

In [6]:
def show_grid(grid):
    return lcat(cat(r) for r in grid)

In [7]:
grid = empty_grid(10, 10)
print(show_grid(grid))

..........
..........
..........
..........
..........
..........
..........
..........
..........
..........


In [8]:
def indices(grid, r, c, l, d):
    dr, dc = delta[d]
    w = len(grid[0])
    h = len(grid)
    inds = [(r + i * dr, c + i * dc) for i in range(l)]
    return [(i, j) for i, j in inds
           if i >= 0
           if j >= 0
           if i < h
           if j < w]

In [9]:
def gslice(grid, r, c, l, d):
    return [grid[i][j] for i, j in indices(grid, r, c, l, d)]

In [10]:
def set_grid(grid, r, c, d, word):
    for (i, j), l in zip(indices(grid, r, c, len(word), d), word):
        grid[i][j] = l
    return grid

In [11]:
set_grid(grid, 2, 3, Direction.downright, 'testword')
print(show_grid(grid))

..........
..........
...t......
....e.....
.....s....
......t...
.......w..
........o.
.........r
..........


In [12]:
cat(gslice(grid, 3, 2, 15, Direction.right))

'..e.....'

In [13]:
re.match(cat(gslice(grid, 3, 2, 4, Direction.right)), 'keen')

<_sre.SRE_Match object; span=(0, 4), match='keen'>

In [14]:
re.match(cat(gslice(grid, 3, 2, 3, Direction.right)), 'keen')

<_sre.SRE_Match object; span=(0, 3), match='kee'>

In [15]:
re.fullmatch(cat(gslice(grid, 3, 2, 3, Direction.right)), 'keen')

In [16]:
re.match(cat(gslice(grid, 3, 2, 4, Direction.right)), 'kine')

In [17]:
def could_add(grid, r, c, d, word):
    s = gslice(grid, r, c, len(word), d)
    return re.fullmatch(cat(s), word)

In [18]:
could_add(grid, 3, 2, Direction.right, 'keen')

<_sre.SRE_Match object; span=(0, 4), match='keen'>

In [19]:
could_add(grid, 3, 2, Direction.right, 'kine')

In [20]:
random.choice(list(Direction))

<Direction.up: 3>

In [21]:
def fill_grid(grid, words, word_count, max_attempts= 1000):
    attempts = 0
    added_words = []
    w = len(grid[0])
    h = len(grid)
    while len(added_words) < word_count and attempts < max_attempts:
        attempts += 1
        r = random.randrange(w)
        c = random.randrange(h)
        word = random.choice(words)
        d = random.choice(list(Direction))
        if could_add(grid, r, c, d, word):
            set_grid(grid, r, c, d, word)
            added_words += [word]
            attempts = 0
    return grid, added_words

In [22]:
g = empty_grid(20, 20)
g, ws = fill_grid(g, ws_words, 40)
len(ws)

35

In [23]:
print(show_grid(g))
print(len(ws), 'words added')
print(wcat(ws))

......swoopingg.l.up
..dunsnapped.n.i..ne
.cee.gninarci.m...er
sotpt......k.pmv..mv
euirca.d..c.n.a...pa
snduo.o.eo.e.lgs..ld
itndny.ctks.i.nos.oe
rroev.lsrsss..ifr.ys
eycno.eb.aeub.ttebas
tmetyr..asgetmuemebe
nerie....tvuu.dsraln
in.adbdmbecls.etocei
w..loeu.lilu..s.fh.d
...rtl.e.ec.l...eimw
..oac.d.v..y.e..rnao
.nrhgniknilsc.n..gyd
.pignippay...l.i..f.
.n..skcenrehtael..l.
g....popinjays.s..y.
gnimmugspuds.relppus
35 words added
ineluctably limpness countrymen slinking beaching restocking vellum convoyed winterises tusked leathernecks sugarcoated mayfly mulching popinjays magnitudes unsnapped prudential yapping spuds softest boron craning unemployable reformers bicycles swooping recondite dowdiness gumming pervades beveled valises suppler prated


In [24]:
def present(grid, word):
    w = len(grid[0])
    h = len(grid)
    for r in range(h):
        for c in range(w):
            for d in Direction:
                if cat(gslice(grid, r, c, len(word), d)) == word:
                    return True, r, c, d
    return False, 0, 0, list(Direction)[0]

In [25]:
for w in ws:
    print(w, present(g, w))

ineluctably (True, 16, 15, <Direction.upleft: 5>)
limpness (True, 0, 16, <Direction.downleft: 7>)
countrymen (True, 2, 1, <Direction.down: 4>)
slinking (True, 15, 11, <Direction.left: 1>)
beaching (True, 8, 17, <Direction.down: 4>)
restocking (True, 9, 5, <Direction.upright: 6>)
vellum (True, 14, 8, <Direction.upright: 6>)
convoyed (True, 4, 4, <Direction.down: 4>)
winterises (True, 12, 0, <Direction.up: 3>)
tusked (True, 9, 12, <Direction.upleft: 5>)
leathernecks (True, 17, 15, <Direction.left: 1>)
sugarcoated (True, 11, 12, <Direction.upleft: 5>)
mayfly (True, 13, 18, <Direction.down: 4>)
mulching (True, 11, 7, <Direction.downleft: 7>)
popinjays (True, 18, 5, <Direction.right: 2>)
magnitudes (True, 3, 14, <Direction.down: 4>)
unsnapped (True, 1, 3, <Direction.right: 2>)
prudential (True, 3, 3, <Direction.down: 4>)
yapping (True, 16, 9, <Direction.left: 1>)
spuds (True, 19, 7, <Direction.right: 2>)
softest (True, 5, 15, <Direction.down: 4>)
boron (True, 11, 5, <Direction.downleft: 7>)

In [26]:
def interesting(grid, words):
    dirs = set(present(grid, w)[3] for w in words)
    return len(words) > 35 and len(words) < 40 and len(dirs) + 1 >= len(delta)

In [27]:
interesting(g, ws)

False

In [28]:
def interesting_grid():
    boring = True
    while boring:
        grid = empty_grid(20, 20)
        grid, words = fill_grid(grid, ws_words, 40)
        boring = not interesting(grid, words)
    return grid, words

In [29]:
g, ws = interesting_grid()
print(show_grid(g))
print(len(ws), 'words added; ', len(set(present(g, w)[3] for w in ws)), 'directions')
print(wcat(ws))

..reittonk..ss......
tinctured.wcee.....w
serutats.oyozm....o.
b....s..l.eoia...m.r
e.b.y.lf..lpsd..bgye
a.ist.no..less.ssrgm
m.gtfi.lo.orae.n.ura
edaei..i.cwi.mo..mor
demrn..b..in.m...psk
epya...e..sgm....ile
slsg...l..hi.....nrd
tekisyassesdepeebeum
rtec.gninretni...sfo
oiinsetse..baggy.snd
ynn....p..sebircsaui
egs.noitasiretupmocf
r.....artefacts....y
s.....seilaog.winosi
.....eyelidsegener.n
regicidesesopatxuj.g
38 words added;  7 directions
wombs persimmons computerisation ascribes coopering goalies beamed modifying insets cigarets statures libels remarked baggy juxtaposes mesdames grumpiness artefacts skeins assizes inflow depleting beeped reneges interning yellowish regicides eyelids cools orgy nifty knottier destroyers unfurls tinctured bigamy winos essays


In [30]:
def datafile(name, sep='\t'):
    """Read key,value pairs from file.
    """
    with open(name) as f:
        for line in f:
            splits = line.split(sep)
            yield [splits[0], int(splits[1])]

In [31]:
def normalise(frequencies):
    """Scale a set of frequencies so they sum to one
    
    >>> sorted(normalise({1: 1, 2: 0}).items())
    [(1, 1.0), (2, 0.0)]
    >>> sorted(normalise({1: 1, 2: 1}).items())
    [(1, 0.5), (2, 0.5)]
    >>> sorted(normalise({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
    [(1, 0.333...), (2, 0.333...), (3, 0.333...)]
    >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items())
    [(1, 0.25), (2, 0.5), (3, 0.25)]
    """
    length = sum(f for f in frequencies.values())
    return collections.defaultdict(int, ((k, v / length) 
        for (k, v) in frequencies.items()))


In [32]:
english_counts = collections.Counter(dict(datafile('count_1l.txt')))
normalised_english_counts = normalise(english_counts)

In [34]:
wordsearch_counts = collections.Counter(cat(ws_words))
normalised_wordsearch_counts = normalise(wordsearch_counts)

In [35]:
def weighted_choice(d):
    """Generate random item from a dictionary of item counts
    """
    target = random.uniform(0, sum(d.values()))
    cuml = 0.0
    for (l, p) in d.items():
        cuml += p
        if cuml > target:
            return l
    return None

def random_english_letter():
    """Generate a random letter based on English letter counts
    """
    return weighted_choice(normalised_english_counts)

def random_wordsearch_letter():
    """Generate a random letter based on wordsearch letter counts
    """
    return weighted_choice(normalised_wordsearch_counts)

In [36]:
cat(sorted(random_english_letter() for i in range(100)))

'aaaaaaaabcccddddeeeeeeeeeeeeeeefffggghhhhhiiiiiillllmnnnnnnoooooooooprrsssssssssssssttttttttuuuvwwwy'

In [37]:
cat(sorted(random_wordsearch_letter() for i in range(100)))

'aaaaaabcccddddddeeeeeeggggghhiiiiiiiiiiiiklllmmmmnnnnnnnnnnoooooooooppprrrrrrrrrssssssttttttuuuwwwyy'

In [38]:
random_wordsearch_letter()

'a'

In [39]:
def pad_grid(g0):
    grid = copy.deepcopy(g0)
    w = len(grid[0])
    h = len(grid)
    for r in range(h):
        for c in range(w):
            if grid[r][c] == '.':
                grid[r][c] = random_wordsearch_letter()
    return grid

In [40]:
padded = pad_grid(g)
print(show_grid(padded))

streittonkorsssatnal
tincturedswceedrlnuw
serutatsloyozmeieiot
baanfsollleoiasnlmar
ewblyhlfetlpsdyvbgye
aeistonoeilessassrgm
mlgtfitloioraeenwura
edaeiupiscwiamoygmor
demrnasbhcinsmiiapsk
epyakraedrsgmolsnile
slsgtuoloihireneonrd
tekisyassesdepeebeum
rtecigninretnincesfo
oiinsetseddbaggydsnd
ynnnsfapcfsebircsaui
egsonoitasiretupmocf
raioelartefactseawfy
speonsseilaogrwinosi
wrndfeyelidsegenerln
regicidesesopatxujrg


In [41]:
print(show_grid(g))

..reittonk..ss......
tinctured.wcee.....w
serutats.oyozm....o.
b....s..l.eoia...m.r
e.b.y.lf..lpsd..bgye
a.ist.no..less.ssrgm
m.gtfi.lo.orae.n.ura
edaei..i.cwi.mo..mor
demrn..b..in.m...psk
epya...e..sgm....ile
slsg...l..hi.....nrd
tekisyassesdepeebeum
rtec.gninretni...sfo
oiinsetse..baggy.snd
ynn....p..sebircsaui
egs.noitasiretupmocf
r.....artefacts....y
s.....seilaog.winosi
.....eyelidsegener.n
regicidesesopatxuj.g


In [42]:
for w in ws:
    print(w, present(padded, w))

wombs (True, 1, 19, <Direction.downleft: 7>)
persimmons (True, 14, 7, <Direction.upright: 6>)
computerisation (True, 15, 18, <Direction.left: 1>)
ascribes (True, 14, 17, <Direction.left: 1>)
coopering (True, 1, 11, <Direction.down: 4>)
goalies (True, 17, 12, <Direction.left: 1>)
beamed (True, 3, 0, <Direction.down: 4>)
modifying (True, 11, 19, <Direction.down: 4>)
insets (True, 13, 2, <Direction.right: 2>)
cigarets (True, 12, 3, <Direction.up: 3>)
statures (True, 2, 7, <Direction.left: 1>)
libels (True, 6, 7, <Direction.down: 4>)
remarked (True, 3, 19, <Direction.down: 4>)
baggy (True, 13, 11, <Direction.right: 2>)
juxtaposes (True, 19, 17, <Direction.left: 1>)
mesdames (True, 7, 13, <Direction.up: 3>)
grumpiness (True, 4, 17, <Direction.down: 4>)
artefacts (True, 16, 6, <Direction.right: 2>)
skeins (True, 10, 2, <Direction.down: 4>)
assizes (True, 6, 12, <Direction.up: 3>)
inflow (True, 6, 5, <Direction.upright: 6>)
depleting (True, 7, 1, <Direction.down: 4>)
beeped (True, 11, 16, <Di

In [43]:
def decoys(grid, words, all_words, limit=60):
    decoy_words = []
    while len(words) + len(decoy_words) < limit:
        d = random.choice(all_words)
        if d not in words and not present(grid, d)[0]:
            decoy_words += [d]
    return decoy_words

In [45]:
ds = decoys(padded, ws, ws_words)
ds

['blindfolding',
 'televised',
 'climaxed',
 'autumns',
 'aquaria',
 'bilks',
 'psychologies',
 'sparkled',
 'dorkiest',
 'corollas',
 'polygons',
 'accessioning',
 'bubbled',
 'astringency',
 'debunking',
 'cannery',
 'exhilarates',
 'overzealous',
 'primping',
 'geckos',
 'admiration',
 'misconstructions']

In [46]:
for w in ws + ds:
    print(w, present(padded, w))

wombs (True, 1, 19, <Direction.downleft: 7>)
persimmons (True, 14, 7, <Direction.upright: 6>)
computerisation (True, 15, 18, <Direction.left: 1>)
ascribes (True, 14, 17, <Direction.left: 1>)
coopering (True, 1, 11, <Direction.down: 4>)
goalies (True, 17, 12, <Direction.left: 1>)
beamed (True, 3, 0, <Direction.down: 4>)
modifying (True, 11, 19, <Direction.down: 4>)
insets (True, 13, 2, <Direction.right: 2>)
cigarets (True, 12, 3, <Direction.up: 3>)
statures (True, 2, 7, <Direction.left: 1>)
libels (True, 6, 7, <Direction.down: 4>)
remarked (True, 3, 19, <Direction.down: 4>)
baggy (True, 13, 11, <Direction.right: 2>)
juxtaposes (True, 19, 17, <Direction.left: 1>)
mesdames (True, 7, 13, <Direction.up: 3>)
grumpiness (True, 4, 17, <Direction.down: 4>)
artefacts (True, 16, 6, <Direction.right: 2>)
skeins (True, 10, 2, <Direction.down: 4>)
assizes (True, 6, 12, <Direction.up: 3>)
inflow (True, 6, 5, <Direction.upright: 6>)
depleting (True, 7, 1, <Direction.down: 4>)
beeped (True, 11, 16, <Di

In [47]:
g, ws = interesting_grid()
p = pad_grid(g)
ds = decoys(p, ws, ws_words)
print(show_grid(p))
print(len(ws), 'words added; ', len(set(present(g, w)[3] for w in ws)), 'directions')
print('Present:', wcat(sorted(ws)))
print('Decoys:', wcat(sorted(ds)))

tuiababblerssknalbft
gnewerodeswskrowerif
nballooningosfriedum
iuebksidesaddlessldt
mrncdbmnnyllasuaceet
udiesnettirwtsohglli
uepoeyremeenoyreveum
cnuselitnegfatterbdp
asslmulleveiadieneer
visnmewsrelrsbtleddi
crouchinghogetrzooms
dstrihstaewseteaotoo
vkssrdovulatesrldgon
snnvengefullyuiawets
uiofendelleuferrcosi
ahsdenoixelpmocluitt
vcdauthorisesnfsnenp
eyoelmferociouslypng
lngimmpauperisedtnon
ytnthstnemecitnehhpc
38 words added;  7 directions
Present: abrades authorises babblers ballooning blanks causally chinks complexioned crouching deluded emery enticements erodes everyone fatter ferociously fireworks fried gentiles ghostwritten godsons imprisons mews ovulates owlets pauperised refuelled retracing sidesaddles suavely supine sweatshirts unburdens vacuuming vellum vengefully yeti zooms
Decoys: chastened doodling ethnologists executrixes freelancing generously halfheartedness harries kinswoman mangles narrowly optimising oversimplifies polystyrene recounts riffing slicking smackers 

In [49]:
for i in range(20):
    print(i)
    g, ws = interesting_grid()
    p = pad_grid(g)
    ds = decoys(p, ws, ws_words)
    with open('wordsearch{:02}.txt'.format(i), 'w') as f:
        f.write('20x20\n')
        f.write(show_grid(p))
        f.write('\n')
        f.write(lcat(sorted(ws + ds)))
    with open('wordsearch-solution{:02}.txt'.format(i), 'w') as f:
        f.write('20x20\n')
        f.write(show_grid(g))
        f.write('\n')
        f.write(lcat(sorted(ws)) + '\n\n')
        f.write(lcat(sorted(ds)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
