In [1]:
import string
import numpy as np
import pandas as pd
from time import time 
pd.options.mode.chained_assignment = None  # default='warn'

from collections import Counter, defaultdict

from itertools import chain, product
from nltk import FreqDist


dictionary_df = pd.read_csv("dictionary_df1.csv")
dictionary_df.loc[160413, "word"] = "null"
dictionary_df.loc[154353, "word"] = "nan"
dictionary_df = dictionary_df[dictionary_df.columns[1:]]

In [2]:
keys = list(string.ascii_lowercase)
score = [1,3,3,2,1,4,2,4,3,8,5,5,3,2,1,3,10,1,1,1,2,4,4,8,4,10]
score_dict = dict(zip(keys, score))
def score_func(x):
    return sum(np.vectorize(score_dict.get)(x))

In [3]:
def occurrence_letter(board):
    # Build the occurrence of letters matrix
    keys = list(string.ascii_lowercase)
    values = list(np.zeros(26).astype("int64"))
    letter_occur_dict = dict(zip(keys, values))

    # Count the number of occurrence of each letters
    _ = board.flatten()
    _ = _[_ != "None"]
    _.sort()
    letters = "".join(_)
    occurrence = dict(Counter(char for char in letters))

    d1 = letter_occur_dict.copy()
    d1.update(occurrence)
    return d1

def streamline(d1, board):
    # I. Find letters with i occurrence and remove those words not satisfying the conditions

    # Find the largest number of occurrence of each letter
    max_occurrence = max(d1.values())
    dictionary_mod = dictionary_df.copy()
    d1_series = pd.Series(d1)
    for i in range(max_occurrence+1):
        # 1. Locate the letters with i occurrence
        indices = d1_series[d1_series==i].index
        # 2. Find those words not satisfying the conditions in the dictionary and remove from the board
        dictionary_mod = dictionary_mod[(dictionary_mod[indices]>i).apply(sum, axis=1) == 0]

    # Sort the dictionary by score and remove words with length greater than (#tiles - 3)
    rows, columns = board.shape
    df = dictionary_mod.sort_values(by=["score"], ascending=False)
    df = df[df["word len"] <= (rows * columns - 3)].reset_index().iloc[:, 1:]

    
    def bag_letters(x):
        return [x["word"][i:i + 2] for i in range(len(x["word"]) - 1)]

    df["bag_letters"] = df.apply(bag_letters, axis=1)
    return df[df.columns[-4:]]

In [4]:
directions = np.array([
    (0, 1),  # Right
    (0, -1), # Left
    (1, 0),  # Down
    (-1, 0), # Up
    (-1, -1),# Upper left
    (-1, 1), # Upper right
    (1, -1), # Bottom left
    (1, 1)   # Bottom right
])

def DFS(word, board):
    rows, columns = board.shape
    word_len = len(word)
    letters = list(word)
    first_letter = letters[0]
    rs, cs = np.where(board==first_letter)
    locations = list(zip(rs, cs))
    queue = list(zip(locations, np.zeros(len(locations)).astype("int64")))
    visited_lst = []
    V_p = []
    
    while queue:
        q = queue.pop(0)
        visited_lst.append(q)
        V = [[i for i, j in visited_lst if j==k][-1] for k in range(max(visited_lst, key=lambda x:x[1])[1]+1)]
        location, index = q
        if index==(word_len - 1):
            V_p.append(V)
        else:
            locs = location + directions
            locs_valid = locs[(locs[:, 0] < rows) & (locs[:, 1] < columns) & (locs[:, 0] >= 0) & (locs[:, 1] >= 0)]
            neighbors = board[locs_valid[:, 0], locs_valid[:, 1]]
            locs_avai = np.where(neighbors==letters[index+1])
            locations_ = [tuple(l) for l in list(locs_valid[locs_avai]) if tuple(l) not in V]
            if not locations_:
                continue
            indices = (np.ones(len(locations_)) + index).astype("int64")
            queue = list(zip(locations_, indices))+queue
    if V_p:
        return V_p
    return 0

In [5]:
def tiles_remove(tiles, board):
    index = (np.array([i for i, j in tiles]), np.array([j for i, j in tiles]))
    board_1 = board.copy()
    board_1[index] = "None"
    return board_1

def gravity(board_1):
    # Gravity down
    row_index, col_index = np.where(board_1 == "None")
    indices = list(zip(row_index, col_index))
    for r, c in indices:
        column = board_1[:r + 1, c].copy()
        if r > 0:
            column[1:] = column[:-1]
            column[0] = "None"
        board_1[:r + 1, c] = column
    # Gravity right
    row_index, col_index = np.where(board_1 == "None")
    indices = list(zip(row_index, col_index))
    for r, c in indices:
        row = board_1[r, :c + 1].copy()
        if c > 0:
            row[1:] = row[:-1]
            row[0] = "None"
        board_1[r, :c + 1] = row
    return board_1

In [6]:
def Move(board, df, word_lst=[], score=0):
    '''
    Input: board and streamlined dictionary
    
    Aims: 
    1. Find the bag of letters in the board
    2. Further streamlined the dictionary
    3. Check if those words are really in the dictionary
    4. Remove corresponding tiles
    5. Apply gravity to the resulted board
    6. Return (resulted board, word, score)
    '''
    # 1 Find the pairs of letters in the board
    rows, columns = board.shape
    # Check the pair of letters in the grid
    bag_letters_2 = []
    for i in range(rows):
        for j in range(columns):
            if i < rows - 1:
                bag_letters_2.append([
                    board[i, j] + board[i + 1, j],
                    board[i + 1, j] + board[i, j]
                ])
                if j < columns - 1:
                    bag_letters_2.append([
                        board[i, j] + board[i, j + 1],
                        board[i, j + 1] + board[i, j],
                        board[i, j] + board[i + 1, j + 1],
                        board[i + 1, j + 1] + board[i, j]
                    ])
                    if j != 0:
                        bag_letters_2.append([
                            board[i, j] + board[i + 1, j - 1],
                            board[i + 1, j - 1] + board[i, j]
                        ])
                else:
                    bag_letters_2.append([
                        board[i, j] + board[i + 1, j - 1],
                        board[i + 1, j - 1] + board[i, j]
                    ])
            else:
                if j < columns - 1:
                    bag_letters_2.append([
                        board[i, j] + board[i, j + 1],
                        board[i, j + 1] + board[i, j]
                    ])
    bag_letters_2 = list(chain.from_iterable(bag_letters_2))
    bag_letters_2 = {_ for _ in bag_letters_2 if "N" not in list(_)}
    
    # 2. Further streamline the dictionary    
    def bag_letter_avai(x):
        for bag in x["bag_letters"]:
            if bag not in bag_letters_2:
                return False
        return True

    df = df[df.apply(bag_letter_avai, axis=1)]
    
    # 3.  Check if those words are really in the dictionary
    if len(df) == 0:
        return []
    df["tiles"] = df["word"].apply(lambda x: DFS(x, board))
    df = df[df["tiles"]!=0]
    if len(df) == 0:
        return []
    df1 = df.loc[df.index.repeat(df["tiles"].str.len())]
    df1["tiles"] = df["tiles"].explode()
    
    # 4. Remove corresponding tiles
    board1 = df1["tiles"].apply(lambda x: tiles_remove(x, board))
    
    # 5. Apply gravity to the reduced board
    board1 = board1.apply(lambda x: gravity(x))
    
    # 6. Return (resulted board, word, score)
    def Word(x):
        w_lst = [x["word"]]
        w_lst.extend(word_lst)
        return w_lst
    
    output = list(zip(board1, df1.apply(Word, axis=1), df1["score"]+score))
    return output

In [19]:
def DFS_Algorithm(board, step=4):
    '''
    Input: board and streamlined dictionary
    
    Output: (Best score, combination of words)
    '''
    _ = Move(board, df, [], 0)
    if len(_)==0:
        return []
    queue = list(zip(_, np.ones(len(_)).astype("int64")))
    word_score_lst = [(i[1],i[2]) for i in _]
    max_score = sorted(word_score_lst, reverse=True)[0][1]
    best_combination = [(i,j) for i,j in word_score_lst if j==max_score]
    best_ = [i for i in _ if i[2]==max_score]
    
    while queue:
        b_tri, index = queue.pop(0)
        print(b_tri)
        b, w, s = b_tri
        strings = b[np.where(b!="None")]
        
        if index >= step:
            continue
        if len(strings) == 0:
            continue
        if (s + len(strings)*score_func(strings)) <= max_score:
            continue
        board_lst = Move(b, df, word_lst=w, score=s)
        if board_lst:
            _ = list(zip(board_lst, np.ones(len(board_lst))+index))
            bs = sorted(board_lst, key=lambda x: x[2], reverse=True)[0][2]
            if bs > max_score:
                max_score = bs
                best_combination = [(j, k) for i,j,k in board_lst if k==max_score]
            queue = _ + queue
        else:
            continue

    return best_

In [20]:
%%time
board = np.array([
        ["m", "a", "i", "n"],
        ["t", "r", "e", "e"],
        ["m", "i", "l", "k"],
        ["c", "o", "o", "l"]
    ], dtype='<U4')
d1 = occurrence_letter(board)
df = streamline(d1, board)

CPU times: user 1.11 s, sys: 32.8 ms, total: 1.14 s
Wall time: 1.14 s


In [10]:
# BFS for the best combination
def BFS(board, step, verbose=True):
    queue = []
    word_score_lst1 = []
    _ = Move(board, df, [], 0)
    board_index_lst = list(zip(_, np.ones(len(_)).astype("int64")))
    queue.extend(board_index_lst)
    word_score_lst1 += [(i[1],i[2], j) for i,j in board_index_lst]
    n = 1
    while queue:
        if verbose:
            start = time()
        board_tri, index = queue.pop(0)
        boards, words_lst, scores = board_tri
        if index > step - 1:
            return word_score_lst1

        else:
            board_lst = Move(boards, df, word_lst=words_lst, score=scores)
            if board_lst:
                board_index_lst = list(zip(board_lst, np.ones(len(board_lst))+index))
                word_score_lst1 += [(i[1],i[2], j) for i,j in board_index_lst]
                queue.extend(board_index_lst)
            else:
                continue
        if verbose:
            end = time()
            print(f"Epoch {n}: {(end-start):.2f}s")
            n+=1
        
    return word_score_lst1

In [11]:
%%time
combo_BFS = BFS(board, step=4)

Epoch 1: 0.02s
Epoch 2: 0.03s
Epoch 3: 0.03s
Epoch 4: 0.03s
Epoch 5: 0.03s
Epoch 6: 0.02s
Epoch 7: 0.02s
Epoch 8: 0.03s
Epoch 9: 0.03s
Epoch 10: 0.02s
Epoch 11: 0.02s
Epoch 12: 0.03s
Epoch 13: 0.04s
Epoch 14: 0.05s
Epoch 15: 0.03s
Epoch 16: 0.03s
Epoch 17: 0.03s
Epoch 18: 0.03s
Epoch 19: 0.02s
Epoch 20: 0.02s
Epoch 21: 0.02s
Epoch 22: 0.03s
Epoch 23: 0.02s
Epoch 24: 0.02s
Epoch 25: 0.02s
Epoch 26: 0.03s
Epoch 27: 0.03s
Epoch 28: 0.04s
Epoch 29: 0.02s
Epoch 30: 0.02s
Epoch 31: 0.03s
Epoch 32: 0.03s
Epoch 33: 0.03s
Epoch 34: 0.04s
Epoch 35: 0.04s
Epoch 36: 0.03s
Epoch 37: 0.03s
Epoch 38: 0.03s
Epoch 39: 0.03s
Epoch 40: 0.05s
Epoch 41: 0.04s
Epoch 42: 0.04s
Epoch 43: 0.04s
Epoch 44: 0.04s
Epoch 45: 0.04s
Epoch 46: 0.03s
Epoch 47: 0.04s
Epoch 48: 0.03s
Epoch 49: 0.04s
Epoch 50: 0.04s
Epoch 51: 0.02s
Epoch 52: 0.03s
Epoch 53: 0.03s
Epoch 54: 0.04s
Epoch 55: 0.03s
Epoch 56: 0.03s
Epoch 57: 0.03s
Epoch 58: 0.03s
Epoch 59: 0.04s
Epoch 60: 0.03s
Epoch 61: 0.03s
Epoch 62: 0.03s
Epoch 63: 0.04s
E

KeyboardInterrupt: 

In [12]:
sorted(combo_BFS, key=lambda x: x[1], reverse=True)

NameError: name 'combo_BFS' is not defined