#  クロスワード自動生成

In [279]:
import re

# クロスワードのサイズ
SIZE = 2

# 空白を表す文字
BLANK = '□'

# 使わないマス目を表す文字
UNABLE = '■'

In [280]:
# 辞書を読み込む
# [word_length][initial] = (word, used_flag)
dictionary = dict()

dictionary_file = 'data/dict.csv'
word_id = 1

with open(dictionary_file, 'r') as fp:
    for row in fp:
        word = row.strip()
        length = len(word)       
        if length not in dictionary:
            dictionary[length] = dict()
        if word[0] not in dictionary[length]:
            dictionary[length][word[0]] = list()
            
        dictionary[length][word[0]].append((word_id, word))
        word_id += 1

print(dictionary)

# クロスワードを表す2次元行列
"""
data_list[i][j] => (i+1)行名の(j+1)列目を表す
"""
data_list = list()
# クロスワード初期化
# 処理を簡単にするため一回り大きいサイズのリストにする
for i in range(0, SIZE+2):
    data_list.append([BLANK]*(SIZE+2))
    data_list[i][0] = UNABLE
    data_list[i][SIZE+1] = UNABLE

# 一回り大きいサイズにしたので、外周をすべてunableにする
data_list[0] = [UNABLE]*(SIZE+2)
data_list[SIZE+1] = [UNABLE]*(SIZE+2)

{2: {'ヤ': [(1, 'ヤマ')], 'カ': [(2, 'カワ')], 'マ': [(3, 'マリ')]}}


In [281]:
# テスト
# 空白の場所を決める
data_list[2][1] = UNABLE

In [282]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

row_labels = [str(chr(i)) for i in range(65296, 65296+10)]
col_labels = [chr(i) for i in range(65, 65+26)]


def _show_program(data_array):
    """
    クロスワードをいい感じで表示する
    """
    
    size = len(data_array)
    
    h_pipe = ''
    v_pipe = ''
    
    for i in range(0, size):
        print(h_pipe.join(data_array[i]))
    
    
    """
    
    # Make a 9x9 grid...
    nrows, ncols = SIZE, SIZE
    #image = np.zeros(nrows*ncols)

    # Set every other cell to a random number (this would be your data)
    #image[::2] = np.random.random(nrows*ncols //2 + 1)

    # Reshape things into a 9x9 grid.
    #image = image.reshape((nrows, ncols))

    #row_labels = range(nrows)
    #col_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
    plt.matshow(data_list)
    plt.xticks(range(ncols), row_labels)
    plt.yticks(range(nrows), col_labels)
    plt.show()
    """

In [283]:
_show_program(data_list)

■■■■
■□□■
■■□■
■■■■


In [284]:
def _get_line_words(data):
    """
    必要な縦、横のリストを求める
    A1タテN文字とか、C3ヨコM文字とか…
    """
    v_words = []
    h_words = []
    
    size = len(data)
    
    for v in range(0, size):
        for h in range(0, size):
            if data[v][h] == UNABLE:
                continue
            
            # タテ
            if data[v-1][h] == UNABLE and data[v+1][h] != UNABLE:
                for i in range(1, size+1):
                    if data[v+i][h] == UNABLE:
                        v_words.append((v, h, '□'*int(i)))
                        break
            # ヨコ
            if data[v][h-1] == UNABLE and data[v][h+1] != UNABLE:
                for i in range(1, size+1):
                    if data[v][h+i] == UNABLE:
                        h_words.append((v, h, '□'*int(i)))
                        break
                        
    return v_words, h_words

v_answer, h_answer= _get_line_words(data_list)

print('v_answer')
print(v_answer)
print('h_answer')
print(h_answer)

v_answer
[(1, 2, '□□')]
h_answer
[(1, 1, '□□')]


In [285]:
def _search_word(pattern, dictionary, searched_words):
    """
    辞書からpatternに合った言葉を抜き出す
    return: 言葉が存在しない場合、None
    """
    
    length = len(pattern)
    regex = re.compile('^{}$'.format(pattern))
    
    try:
        # 頭文字が決まっていない場合
        if pattern[0] == '.':
            candidate_dict = dictionary[length]
        else:
            candidate_dict = {pattern[0]: dictionary[length][pattern[0]]}
            
        for initial, word_list in candidate_dict.items():
            for word_info in word_list:
                if word_info not in searched_words and regex.match(word_info[1]):
                    return word_info
    
        return None, None
                 
    # 文字数、頭文字の候補が辞書にない場合
    except KeyError as e:
        return None, None

def _next_search(search_index):
    """
    探索位置を次に移動させる
    左上から右下に向けて探索していくイメージ
    """
    
    v, h = search_index
    
    if v == SIZE and h == SIZE:
        return None
    
    if h == SIZE:
        return (v+1, 1)
    
    return (v, h+1)

def _create_corss_word(data_array, v_words, h_words, search_index, dic):
    """
    クロスワードをつくる
    @param: search_index
    """
    
    # 終了条件
    if len(v_words) <= search_index and len(h_words) <= search_index:
        return data_array
    
    # 探索済み単語リスト
    searched_words = []
    
    # 縦を検索
    if search_index < len(v_words):
        # セルの位置と文字数
        v_index, h_index, word = v_words[search_index]
        candidate = [data_array[v_index+i][h_index] for i in range(0, len(word))]
        candidate_regex = ''.join(candidate).replace(BLANK, '.')

        # 言葉がすべて埋まっていないか確認
        if BLANK in candidate:
            print('tate ({}, {}) = {}'.format(v_index, h_index, candidate_regex.replace('.', BLANK)))

            while True:
                answer_id, answer = _search_word(candidate_regex, dic, searched_words)
                # 候補が見つからなかった
                if not answer_id:
                    print('ERROR: Word Not Found')
                    
                    # 回答をもとに戻す
                    for i in range(0, len(candidate)):
                        data_array[v_index+i][h_index] = candidate[i]
                        
                    print('dic = {}'.format(dic))
                    print('searched_words = {}'.format(searched_words))
                        
                    return None
                
                # 辞書を更新
                dic[len(answer)][answer[0]].remove((answer_id, answer))
                searched_words.append((answer_id, answer))

                print('tate hit: {}: {} '.format(answer_id, answer))
                
                # 縦、横のリストを更新
                # v_words[index] = (v_index, h_index, answer)
                # 回答を更新
                for i in range(0, len(answer)):
                    data_array[v_index+i][h_index] = answer[i]

                print('暫定')            
                _show_program(data_array)
                
                result = _create_corss_word(data_array, v_words, h_words, search_index, dic)
                
                # 探索失敗
                if result is None:
                    dic[len(answer)][answer[0]].append((answer_id, answer))
                    continue
                    
                return result
    
    # ヨコに入る言葉を見つける
    if search_index < len(h_words):
        # セルの位置と文字数
        v_index, h_index, word = h_words[search_index]
        candidate = data_array[v_index][h_index: h_index+len(word)]
        candidate_regex = ''.join(candidate).replace(BLANK, '.')

        # まだ言葉が決まっていないなら
        if BLANK in word:
            print('yoko ({}, {}) = {}'.format(v_index, h_index, candidate_regex.replace('.', BLANK)))

            while True:
                print(candidate_regex)
                print(dic)
                answer_id, answer = _search_word(candidate_regex, dic, searched_words)
                # 候補が見つからなかった
                if not answer_id:
                    print('ERROR: Word Not Found')
                    
                    #  回答をもとに戻す
                    for i in range(0, len(candidate)):
                        data_array[v_index][h_index+i] = candidate[i]
                        
                    print('dic = {}'.format(dic))
                    print('searched_words = {}'.format(searched_words))
                        
                    return None

                # 辞書を更新
                dic[len(answer)][answer[0]].remove((answer_id, answer))
                searched_words.append((answer_id, answer))

                print('yoko hit: {}: {} '.format(answer_id, answer))
                
                # 縦、横のリストを更新
                # v_words[index] = (v_index, h_index, answer)
                # 回答を更新
                for i in range(0, len(answer)):
                    data_array[v_index][h_index+i] = answer[i]
                
                print('暫定')            
                _show_program(data_array)

                result = _create_corss_word(data_array, v_words, h_words, search_index+1, dic)

                # 探索失敗
                if result is None:
                    dic[len(answer)][answer[0]].append((answer_id, answer))
                    continue
                    
                return result

            _show_program(data_array)

    return data_array

# クロスワードを作る
import copy
data_tmp = copy.deepcopy(data_list)
print('init')
_show_program(data_tmp)

result = _create_corss_word(data_tmp, v_answer, h_answer, 0, dictionary)

_show_program(result)

init
■■■■
■□□■
■■□■
■■■■
tate (1, 2) = □□
tate hit: 1: ヤマ 
暫定
■■■■
■□ヤ■
■■マ■
■■■■
yoko (1, 1) = □ヤ
.ヤ
{2: {'ヤ': [], 'カ': [(2, 'カワ')], 'マ': [(3, 'マリ')]}}
ERROR: Word Not Found
dic = {2: {'ヤ': [], 'カ': [(2, 'カワ')], 'マ': [(3, 'マリ')]}}
searched_words = []
tate hit: 2: カワ 
暫定
■■■■
■□カ■
■■ワ■
■■■■
yoko (1, 1) = □カ
.カ
{2: {'ヤ': [(1, 'ヤマ')], 'カ': [], 'マ': [(3, 'マリ')]}}
ERROR: Word Not Found
dic = {2: {'ヤ': [(1, 'ヤマ')], 'カ': [], 'マ': [(3, 'マリ')]}}
searched_words = []
tate hit: 3: マリ 
暫定
■■■■
■□マ■
■■リ■
■■■■
yoko (1, 1) = □マ
.マ
{2: {'ヤ': [(1, 'ヤマ')], 'カ': [(2, 'カワ')], 'マ': []}}
yoko hit: 1: ヤマ 
暫定
■■■■
■ヤマ■
■■リ■
■■■■
[['■', '■', '■', '■'], ['■', 'ヤ', 'マ', '■'], ['■', '■', 'リ', '■'], ['■', '■', '■', '■']]
■■■■
■ヤマ■
■■リ■
■■■■
