In [1]:
import numpy as np
import pandas as pd
import string
from time import time

directions = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1),  (1, 0), (1, 1)]
directions_array = np.array(directions)

class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_word = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_word = True

    def search(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                return False
            node = node.children[char]
        return node.is_word

    def starts_with(self, prefix):
        node = self.root
        for char in prefix:
            if char not in node.children:
                return False
            node = node.children[char]
        return True

trie = Trie()
with open('dictionary_actual_game.txt', 'r') as file:
    for word in file.read().splitlines():
        trie.insert(word)
        
keys = list(string.ascii_lowercase)
score = [1,3,3,2,1,4,2,4,3,8,5,5,3,2,1,3,10,1,1,1,2,4,4,8,4,10]
letter_to_int = dict(zip(keys, range(1, 27)))
letter_to_int["N"] = 0

dictionary_df = pd.read_csv("dictionary_df2.csv")
dictionary_df.loc[160413, "word"] = "null"
dictionary_df.loc[154353, "word"] = "nan"
dictionary_df = dictionary_df[dictionary_df.columns[1:]]
score_dict = dict(zip(dictionary_df["word"], dictionary_df["score"]))

def tiles_remove(tiles, board):
    board[tiles[:, 0], tiles[:, 1]] = "N"
    return board

def gravity(board_1):
    # Gravity down
    row_index, col_index = np.where(board_1 == "N")
    indices = list(zip(row_index, col_index))
    for r, c in indices:
        column = board_1[:r + 1, c].copy()
        if r > 0:
            column[1:] = column[:-1]
            column[0] = "N"
        board_1[:r + 1, c] = column
    # Gravity right
    row_index, col_index = np.where(board_1 == "N")
    indices = list(zip(row_index, col_index))
    for r, c in indices:
        row = board_1[r, :c + 1].copy()
        if c > 0:
            row[1:] = row[:-1]
            row[0] = "N"
        board_1[r, :c + 1] = row
    return board_1

class WordSoup_state:
    def __init__(self, board, S=0, word_lst=[], depth=0):
        self.board = board
        self.score = S
        self.word_lst = word_lst
        self.depth = depth
        self.board_array = np.array(board)

    def get_legal_actions(self):
        rows, cols = len(self.board), len(self.board[0])
        result = []

        def dfs(i, j, path, indices, visited, node):
            if node.is_word:
                result.append((path, indices))  # Store the word and its indices
            for dx, dy in directions:
                x, y = i + dx, j + dy
                if 0 <= x < rows and 0 <= y < cols and (x, y) not in visited:
                    char = self.board[x][y]
                    if char in node.children:
                        visited.add((x, y))
                        dfs(x, y, path + char, indices + [(x, y)], visited, node.children[char])
                        visited.remove((x, y))

        for i in range(rows):
            for j in range(cols):
                char = self.board[i][j]
                if char in trie.root.children:
                    dfs(i, j, char, [(i, j)], {(i, j)}, trie.root.children[char])

        return result

    def is_game_over(self):
        found_words = self.get_legal_actions()
        if len(found_words) > 0:
            return False
        return True

    def game_result(self, Score):
        if self.score >= Score:
            return 1
        return -1

    def move(self, action):
        word, indices = action
        _ = tiles_remove(np.array(indices), self.board_array.copy())
        new_board = gravity(_)
        new_score = self.score + score_dict[word]
        if np.sum(new_board != "N") == 0:
            new_score += 500
        new_board = new_board.tolist()
        new_word_lst = self.word_lst.copy()
        new_word_lst.append(word)

        return WordSoup_state(new_board, new_score, new_word_lst, self.depth+1)

In [89]:
def ExhaustiveSearch(state, step=1):
    L = 0
    for l in state.board:
        for i in l:
            L+=1
    untried_actions = state.get_legal_actions()
    untried_actions_sorted = sorted(untried_actions, key=lambda x: score_dict[x[0]], reverse=True)
    best_score_so_far = score_dict[untried_actions_sorted[0][0]]
    best_ = []
    for action in untried_actions_sorted:
        if score_dict[action[0]]==best_score_so_far:
            best_.append(action)
        else:
            break
    result = [state.move(action) for action in best_]
    if step == 1:
        return result
    else:
        queue = [state.move(action) for action in untried_actions_sorted]
        best = []
    
    while queue:
        state = queue.pop(0)
        y = best_score_so_far - state.score
        untried_actions = state.get_legal_actions()
        if len(untried_actions) == 0:
            continue
        length = 0
        for l in state.board:
            for i in l:
                if i != "N":
                    break
                length+=1
        
        def Score_Cal(x):
            if len(x[0])==(L-length):
                return score_dict[x[0]]+500
            else:
                return score_dict[x[0]]
        untried_actions_sorted = sorted(untried_actions, key=lambda x: Score_Cal(x), reverse=True)
        s_ = Score_Cal(untried_actions_sorted[0])
        if s_ > y:
            best_score_so_far = state.score + s_
            best = []
            for x in untried_actions_sorted:
                if Score_Cal(x)==s_:
                    best.append((state, x))
                else:
                    break
        if state.depth < step - 1:
            _ = [state.move(action) for action in untried_actions_sorted]
            if _:
                queue = _ + queue
            else:
                continue
    if best:
        result = [state.move(action) for state, action in best]
    return result

# Q-learning

Code here are created by generative AI and we cite it here.

### Feature extractions (standardized)
state (partial board):
1. board length
2. occurrence of letters
3. board liberity

action (word):
1. word length
2. number of vowels
3. occurrence of letters
4. score

In [104]:
def count_liberty(board):
    b = board.copy()
    rows, columns = b.shape
    row, col = np.where(b == "N")
    NE = set(zip(row, col))
    row, col = np.where(b != "N")
    E = list(zip(row, col))
    
    total_liberty = 0
    for r, c in E:
        neighbors = directions_array + np.array([r, c])
        valid_neighbors = neighbors[
            (neighbors[:, 0] >= 0) & (neighbors[:, 0] < rows) &
            (neighbors[:, 1] >= 0) & (neighbors[:, 1] < columns)
        ]
        total_liberty += sum(tuple(pos) not in NE for pos in valid_neighbors)
    
    return total_liberty

def Feature_Selection(state, action):
    board_array = np.array(state.board)
    word, indices = action
    word_array = np.array(list(word))
    
    board_length = np.count_nonzero(board_array != "N")
    occ_board = [np.count_nonzero(board_array == l) / board_length for l in string.ascii_lowercase]
    liberty = count_liberty(board_array) / 742
    
    word_length = len(word)
    num_vowels = sum(1 for char in word if char in 'aeiou') / word_length
    occ_word = [np.count_nonzero(word_array == l) / word_length for l in string.ascii_lowercase]
    word_score = score_dict[word] / 1475
    
    return np.array([board_length / 108] + occ_board + [liberty] +
                    [word_length / 25] + [num_vowels] + occ_word + [word_score])

def q_value(theta, features):
    return np.dot(theta, features)

def epsilon_greedy_policy(state, theta, epsilon=0.1):
    actions = state.get_legal_actions()
    if not actions:
        return None

    if np.random.rand() < epsilon:
        return actions[np.random.choice(len(actions))]

    values = [(action, q_value(theta, Feature_Selection(state, action))) for action in actions]
    return max(values, key=lambda x: x[1])[0]

def q_learning_update(theta, state, action, next_state, reward, alpha=0.01, gamma=1.0):
    features = Feature_Selection(state, action)

    if next_state.is_game_over():
        target = reward
    else:
        next_actions = next_state.get_legal_actions()
        if not next_actions:
            target = reward
        else:
            next_values = [q_value(theta, Feature_Selection(next_state, a)) for a in next_actions]
            target = reward + gamma * max(next_values)

    prediction = q_value(theta, features)
    td_error = target - prediction
    theta += alpha * td_error * features
    return theta

def train_q_learning(initial_board, episodes=1000, alpha=0.01, gamma=1.0, epsilon=0.1, decay=False):
    theta = np.random.randn(57)

    for ep in range(episodes):
        state = WordSoup_state(initial_board.copy())
        step = 0
        while not state.is_game_over():
            action = epsilon_greedy_policy(state, theta, epsilon)
            if action is None:
                break

            next_state = state.move(action)
            reward = score_dict[action[0]]
            if np.count_nonzero(next_state.board_array != "N") == 0:
                reward += 500

            theta = q_learning_update(theta, state, action, next_state, reward, alpha, gamma)
            state = next_state
            step += 1

        if decay:
            epsilon = max(0.01, epsilon * 0.995)

        if (ep + 1) % 50 == 0:
            print(f"Episode {ep + 1}: epsilon={epsilon:.4f}")

    return theta


In [105]:
board = [
    ['p', 'i', 'g'],
    ['c', 'a', 't'],
    ['d', 'o', 'g']
]
initial_state = WordSoup_state(board)

In [106]:
%%time
theta = train_q_learning(board)

Episode 50: epsilon=0.1000
Episode 100: epsilon=0.1000
Episode 150: epsilon=0.1000
Episode 200: epsilon=0.1000
Episode 250: epsilon=0.1000
Episode 300: epsilon=0.1000
Episode 350: epsilon=0.1000
Episode 400: epsilon=0.1000
Episode 450: epsilon=0.1000
Episode 500: epsilon=0.1000
Episode 550: epsilon=0.1000
Episode 600: epsilon=0.1000
Episode 650: epsilon=0.1000
Episode 700: epsilon=0.1000
Episode 750: epsilon=0.1000
Episode 800: epsilon=0.1000
Episode 850: epsilon=0.1000
Episode 900: epsilon=0.1000
Episode 950: epsilon=0.1000
Episode 1000: epsilon=0.1000
Episode 1050: epsilon=0.1000
Episode 1100: epsilon=0.1000
Episode 1150: epsilon=0.1000
Episode 1200: epsilon=0.1000
Episode 1250: epsilon=0.1000
Episode 1300: epsilon=0.1000
Episode 1350: epsilon=0.1000
Episode 1400: epsilon=0.1000
Episode 1450: epsilon=0.1000
Episode 1500: epsilon=0.1000
Episode 1550: epsilon=0.1000
Episode 1600: epsilon=0.1000
Episode 1650: epsilon=0.1000
Episode 1700: epsilon=0.1000
Episode 1750: epsilon=0.1000
Episo

In [107]:
def greedy_policy(state, theta):
    actions = state.get_legal_actions()
    if not actions:
        return None

    best_action = None
    best_value = -float('inf')

    for action in actions:
        features = Feature_Selection(state, action)
        value = q_value(theta, features)
        if value > best_value:
            best_value = value
            best_action = action

    return best_action

In [109]:
a = greedy_policy(initial_state, theta)
a

('oca', [(2, 1), (1, 0), (1, 1)])

In [111]:
second_state = initial_state.move(a)
greedy_policy(second_state, theta)

('dipt', [(2, 0), (2, 1), (1, 1), (1, 2)])

In [112]:
theta

array([ 2.01037236e+00,  1.67011406e+01, -7.35071228e-01,  1.92661339e+01,
        4.44040946e+00,  2.50228746e-01,  3.97600300e-01, -3.43370907e+00,
        4.16545215e-01,  5.52904096e+01,  1.36177769e+00,  3.66747331e-01,
        1.36206331e+00, -1.14302789e+00,  3.11185241e+00,  2.56836612e+01,
        7.18909256e+00, -3.28915842e-01, -3.81950960e-02, -8.71895389e-01,
        1.25921782e+01,  5.74665384e-02,  9.62221678e-01,  4.29382528e-01,
        8.30820778e-01, -9.13803767e-01,  1.85254428e-02,  3.78641359e+00,
        1.98289893e+01,  2.50735457e+01,  1.72472980e+01,  7.23377969e-02,
        2.26511773e+01,  2.05323084e+01, -1.55597008e+00,  3.81261414e-01,
        2.09141384e+01, -1.93285732e+00, -2.19837311e+00, -1.59423997e+00,
        6.05048964e-03, -6.08585031e-01,  6.78933710e-01,  1.15973649e+00,
        9.45286174e+00,  2.52554928e+01, -7.69424747e-01, -5.56702308e-01,
       -2.41667774e-01,  2.07523605e+01, -1.25622900e-01,  2.56414921e-01,
       -4.32617838e-01,  

In [4]:
score_dict["booked"]

78