In [1]:
import numpy as np
import pandas as pd
import string
from time import time

directions = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1),  (1, 0), (1, 1)]

class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_word = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_word = True

    def search(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                return False
            node = node.children[char]
        return node.is_word

    def starts_with(self, prefix):
        node = self.root
        for char in prefix:
            if char not in node.children:
                return False
            node = node.children[char]
        return True

trie = Trie()
with open('dictionary_actual_game.txt', 'r') as file:
    for word in file.read().splitlines():
        trie.insert(word)
        
keys = list(string.ascii_lowercase)
score = [1,3,3,2,1,4,2,4,3,8,5,5,3,2,1,3,10,1,1,1,2,4,4,8,4,10]
letter_to_int = dict(zip(keys, range(1, 27)))
letter_to_int["N"] = 0

dictionary_df = pd.read_csv("dictionary_df2.csv")
dictionary_df.loc[160413, "word"] = "null"
dictionary_df.loc[154353, "word"] = "nan"
dictionary_df = dictionary_df[dictionary_df.columns[1:]]
score_dict = dict(zip(dictionary_df["word"], dictionary_df["score"]))

def tiles_remove(tiles, board):
    board[tiles[:, 0], tiles[:, 1]] = "N"
    return board

def gravity(board_1):
    # Gravity down
    row_index, col_index = np.where(board_1 == "N")
    indices = list(zip(row_index, col_index))
    for r, c in indices:
        column = board_1[:r + 1, c].copy()
        if r > 0:
            column[1:] = column[:-1]
            column[0] = "N"
        board_1[:r + 1, c] = column
    # Gravity right
    row_index, col_index = np.where(board_1 == "N")
    indices = list(zip(row_index, col_index))
    for r, c in indices:
        row = board_1[r, :c + 1].copy()
        if c > 0:
            row[1:] = row[:-1]
            row[0] = "N"
        board_1[r, :c + 1] = row
    return board_1

class WordSoup_state:
    def __init__(self, board, S=0, word_lst=[], depth=0):
        self.board = board
        self.score = S
        self.word_lst = word_lst
        self.depth = depth
        self.board_array = np.array(board)

    def get_legal_actions(self):
        rows, cols = len(self.board), len(self.board[0])
        result = []

        def dfs(i, j, path, indices, visited, node):
            if node.is_word:
                result.append((path, indices))  # Store the word and its indices
            for dx, dy in directions:
                x, y = i + dx, j + dy
                if 0 <= x < rows and 0 <= y < cols and (x, y) not in visited:
                    char = self.board[x][y]
                    if char in node.children:
                        visited.add((x, y))
                        dfs(x, y, path + char, indices + [(x, y)], visited, node.children[char])
                        visited.remove((x, y))

        for i in range(rows):
            for j in range(cols):
                char = self.board[i][j]
                if char in trie.root.children:
                    dfs(i, j, char, [(i, j)], {(i, j)}, trie.root.children[char])

        return result

    def is_game_over(self):
        found_words = self.get_legal_actions()
        if len(found_words) > 0:
            return False
        return True

    def game_result(self, Score):
        if self.score >= Score:
            return 1
        return -1

    def move(self, action):
        word, indices = action
        _ = tiles_remove(np.array(indices), self.board_array.copy())
        new_board = gravity(_).tolist()
        new_score = self.score + score_dict[word]
        if np.sum(new_board != "N") == 0:
            new_score += 500
        new_word_lst = self.word_lst.copy()
        new_word_lst.append(word)

        return WordSoup_state(new_board, new_score, new_word_lst, self.depth+1)

In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
import random
from collections import deque

np.random.seed(0)
# Define the two-stage DQN model
class TwoStageDQN(tf.keras.Model):
    def __init__(self, board_input_size, action_input_size, hidden_size):
        super(TwoStageDQN, self).__init__()
        
        # Stage 1: Board Processor
        self.board_conv1 = layers.Conv2D(64, kernel_size=(3, 3), activation='relu')
        self.board_conv2 = layers.Conv2D(64, kernel_size=(3, 3), activation='relu')
        self.board_flatten = layers.Flatten()
        self.board_fc1 = layers.Dense(hidden_size, activation='relu')
        
        # Stage 2: Q-Value Predictor
        self.action_fc1 = layers.Dense(hidden_size, activation='relu')
        self.action_fc1 = layers.Dense(hidden_size, activation='relu')
        self.action_fc2 = layers.Dense(hidden_size, activation='relu')
        self.output_layer = layers.Dense(1)  # Q-value for the action

    def process_board(self, board_input):
        # Stage 1: Process the one-hot encoded board
        x = self.board_conv1(board_input)
        x = self.board_conv2(x)
        x = self.board_flatten(x)
        latent_board = self.board_fc1(x)
        return latent_board

    def predict_q_value(self, latent_board, action_input):
        # Stage 2: Combine latent board and action features
        combined = tf.concat([latent_board, action_input], axis=1)
        combined = self.action_fc1(combined)
        combined = self.action_fc2(combined)
        
        # Output Q-value
        q_value = self.output_layer(combined)
        return q_value

2025-04-17 17:32:46.405880: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Hyperparameters
board_shape = (12, 9, 27)  # One-hot encoded board (12x9 grid, 26 letters)
action_input_size = 5  # Number of action features (e.g., word length, vowels, etc.)
hidden_size = 128
batch_size = 64
epsilon = 1.0
epsilon_min = 0.001
epsilon_decay = 0.9975
replay_buffer = deque(maxlen=10000)

# Initialize DQN and optimizer
dqn = TwoStageDQN(board_shape, action_input_size, hidden_size)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
loss_fn = tf.keras.losses.MeanSquaredError()


def preprocess_board(board):
    # Convert the board to a numerical matrix
    board_matrix = np.array([[letter_to_int[cell] for cell in row] for row in board])
    # One-hot encode the matrix
    num_classes = 27  # 26 letters + 1 for empty space
    one_hot_board = np.eye(num_classes)[board_matrix]
    return one_hot_board


def preprocess_action_features(word):
    return np.array([
        len(word),
        sum(1 for char in word if char in 'aeiou'),
        word.count('q'),
        score_dict[word]
    ])

# DQN

The code here is created by the generative AI. The prompt we use to create is: 
"create DQN which first take board as input and then combine with the features we listed for actions to make decision"

In [4]:
%%time
board = [
    ['z', 'h', 's', 's', 'g', 'a', 's', 'p', 'r'],
    ['r', 'q', 'q', 'i', 'm', 'n', 'o', 'd', 'e'],
    ['n', 'b', 'a', 's', 't', 'd', 't', 'i', 'o'],
    ['e', 'a', 't', 'k', 'k', 'n', 'a', 'a', 'd'],
    ['o', 'o', 'r', 'n', 'a', 'y', 'n', 't', 'o'],
    ['n', 'p', 'u', 'l', 'r', 'e', 'a', 'o', 'a'],
    ['t', 'm', 's', 'e', 'm', 'r', 'p', 'n', 'y'],
    ['o', 'n', 'e', 'l', 'a', 's', 'u', 'e', 'a'],
    ['p', 'h', 't', 'q', 'm', 'l', 'u', 'r', 'o'],
    ['i', 'c', 'u', 'o', 'n', 'x', 'o', 'i', 'b'],
    ['o', 'e', 'n', 'i', 'i', 'r', 'n', 'x', 'k'],
    ['i', 'q', 'u', 'a', 'p', 'f', 't', 'l', 'e']
]

# Training loop
for episode in range(2000):
    t0 = time()
    state = WordSoup_state(board)  # Initialize the game
    total_reward = 0
    episode_transitions = []  # Store transitions for the entire episode

    while not state.is_game_over():
        # 1. Find all legal actions
        legal_actions = state.get_legal_actions()

        # 2. Preprocess the board (one-hot encoding)
        board_input = preprocess_board(state.board_array)
        board_input = tf.convert_to_tensor([board_input], dtype=tf.float32)  # Add batch dimension

        # 3. Process the board into a latent representation
        latent_board = dqn.process_board(board_input)

        # 4. Extract features for each action and compute Q-values
        q_values = []
        for action in legal_actions:
            word, _ = action
            action_features = preprocess_action_features(word)
            action_input = tf.convert_to_tensor([action_features], dtype=tf.float32)  # Add batch dimension
            
            # Predict Q-value
            q_value = dqn.predict_q_value(latent_board, action_input)
            q_values.append(q_value.numpy()[0][0])

        # 5. Select the best action by Q-value (epsilon-greedy)
        if random.random() < epsilon:
            action_idx = random.randint(0, len(legal_actions) - 1)
        else:
            action_idx = np.argmax(q_values)

        # 6. Execute the best action and update state
        action = legal_actions[action_idx]
        next_state = state.move(action)
        reward = next_state.score - state.score

        # Accumulate total reward
        total_reward += reward

        # Store transition (state, action, total reward) for the episode
        episode_transitions.append((state, action, total_reward))

        # Update state
        state = next_state

    # 7. Train the DQN using the total reward for the episode
    if len(replay_buffer) >= batch_size:
        # Sample a batch of episodes from the replay buffer
        batch = random.sample(replay_buffer, batch_size)

        # Preprocess boards and action features for the batch
        board_inputs = np.array([preprocess_board(s.board_array) for s, _, _ in batch])
        action_inputs = np.array([preprocess_action_features(a[0]) for _, a, _ in batch])
        total_rewards = np.array([r for _, _, r in batch])

        # Convert to tensors
        board_inputs = tf.convert_to_tensor(board_inputs, dtype=tf.float32)
        action_inputs = tf.convert_to_tensor(action_inputs, dtype=tf.float32)
        total_rewards = tf.convert_to_tensor(total_rewards, dtype=tf.float32)

        # Compute loss and update the DQN
        with tf.GradientTape() as tape:
            # Process boards into latent representations
            latent_boards = dqn.process_board(board_inputs)

            # Compute current Q-values
            current_q_values = []
            for i in range(batch_size):
                action_input = tf.convert_to_tensor([action_inputs[i]], dtype=tf.float32)
                current_q_value = dqn.predict_q_value(latent_boards[i:i+1], action_input)
                current_q_values.append(current_q_value)
            current_q_values = tf.stack(current_q_values, axis=0)

            # Compute loss (mean squared error between total rewards and predicted Q-values)
            loss = tf.reduce_mean(tf.square(total_rewards - current_q_values))

        # Update DQN weights
        grads = tape.gradient(loss, dqn.trainable_variables)
        optimizer.apply_gradients(zip(grads, dqn.trainable_variables))

    # Store the episode's transitions in the replay buffer
    replay_buffer.extend(episode_transitions)

    # Decay epsilon
    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    t1 = time()
    print(f"Episode {episode}, Total Reward: {total_reward}, Total Time Elapsed: {t1 - t0}")

Episode 0, Total Reward: 899, Total Time Elapsed: 23.02125096321106
Episode 1, Total Reward: 956, Total Time Elapsed: 24.545952081680298
Episode 2, Total Reward: 800, Total Time Elapsed: 22.68246340751648
Episode 3, Total Reward: 830, Total Time Elapsed: 26.081939935684204
Episode 4, Total Reward: 981, Total Time Elapsed: 20.745687007904053
Episode 5, Total Reward: 1070, Total Time Elapsed: 16.75188899040222
Episode 6, Total Reward: 836, Total Time Elapsed: 30.925428867340088
Episode 7, Total Reward: 948, Total Time Elapsed: 27.457095861434937
Episode 8, Total Reward: 808, Total Time Elapsed: 21.95111083984375
Episode 9, Total Reward: 877, Total Time Elapsed: 27.33321189880371
Episode 10, Total Reward: 1025, Total Time Elapsed: 24.287875175476074
Episode 11, Total Reward: 810, Total Time Elapsed: 24.795196056365967
Episode 12, Total Reward: 1031, Total Time Elapsed: 22.084805727005005
Episode 13, Total Reward: 1100, Total Time Elapsed: 21.351004123687744
Episode 14, Total Reward: 901, 

Episode 117, Total Reward: 1251, Total Time Elapsed: 19.313645839691162
Episode 118, Total Reward: 1204, Total Time Elapsed: 21.460262775421143
Episode 119, Total Reward: 1065, Total Time Elapsed: 25.69695496559143
Episode 120, Total Reward: 1172, Total Time Elapsed: 24.014363050460815
Episode 121, Total Reward: 1389, Total Time Elapsed: 18.100990056991577
Episode 122, Total Reward: 1176, Total Time Elapsed: 19.566651105880737
Episode 123, Total Reward: 1312, Total Time Elapsed: 23.34418797492981
Episode 124, Total Reward: 1231, Total Time Elapsed: 17.905307054519653
Episode 125, Total Reward: 1287, Total Time Elapsed: 19.208006143569946
Episode 126, Total Reward: 1619, Total Time Elapsed: 17.398204803466797
Episode 127, Total Reward: 1468, Total Time Elapsed: 20.25252389907837
Episode 128, Total Reward: 1547, Total Time Elapsed: 20.75964379310608
Episode 129, Total Reward: 1010, Total Time Elapsed: 28.78926992416382
Episode 130, Total Reward: 1188, Total Time Elapsed: 28.8001008033752

Episode 232, Total Reward: 1343, Total Time Elapsed: 18.872013092041016
Episode 233, Total Reward: 1572, Total Time Elapsed: 20.19125199317932
Episode 234, Total Reward: 1529, Total Time Elapsed: 15.47000765800476
Episode 235, Total Reward: 1319, Total Time Elapsed: 19.415912866592407
Episode 236, Total Reward: 1594, Total Time Elapsed: 13.647797107696533
Episode 237, Total Reward: 1534, Total Time Elapsed: 16.4377498626709
Episode 238, Total Reward: 1447, Total Time Elapsed: 17.269147157669067
Episode 239, Total Reward: 1615, Total Time Elapsed: 14.631322145462036
Episode 240, Total Reward: 1598, Total Time Elapsed: 16.364948987960815
Episode 241, Total Reward: 1590, Total Time Elapsed: 11.976043939590454
Episode 242, Total Reward: 1289, Total Time Elapsed: 14.559087753295898
Episode 243, Total Reward: 1623, Total Time Elapsed: 16.074496030807495
Episode 244, Total Reward: 1652, Total Time Elapsed: 13.072214126586914
Episode 245, Total Reward: 1447, Total Time Elapsed: 15.131213903427

Episode 347, Total Reward: 1525, Total Time Elapsed: 15.572880983352661
Episode 348, Total Reward: 1794, Total Time Elapsed: 12.625622034072876
Episode 349, Total Reward: 1648, Total Time Elapsed: 23.04950499534607
Episode 350, Total Reward: 1397, Total Time Elapsed: 15.185667276382446
Episode 351, Total Reward: 1152, Total Time Elapsed: 21.647997856140137
Episode 352, Total Reward: 1570, Total Time Elapsed: 16.227208852767944
Episode 353, Total Reward: 1720, Total Time Elapsed: 11.882344245910645
Episode 354, Total Reward: 1527, Total Time Elapsed: 16.018134832382202
Episode 355, Total Reward: 1747, Total Time Elapsed: 12.157696962356567
Episode 356, Total Reward: 1588, Total Time Elapsed: 12.717846155166626
Episode 357, Total Reward: 1586, Total Time Elapsed: 19.764933109283447
Episode 358, Total Reward: 1714, Total Time Elapsed: 16.609270095825195
Episode 359, Total Reward: 1387, Total Time Elapsed: 12.539347171783447
Episode 360, Total Reward: 1446, Total Time Elapsed: 20.482550859

Episode 462, Total Reward: 1623, Total Time Elapsed: 11.941762924194336
Episode 463, Total Reward: 1594, Total Time Elapsed: 14.764562845230103
Episode 464, Total Reward: 1539, Total Time Elapsed: 16.96691107749939
Episode 465, Total Reward: 1648, Total Time Elapsed: 15.118746280670166
Episode 466, Total Reward: 1602, Total Time Elapsed: 12.099562883377075
Episode 467, Total Reward: 1524, Total Time Elapsed: 14.12881088256836
Episode 468, Total Reward: 1818, Total Time Elapsed: 12.76479721069336
Episode 469, Total Reward: 1607, Total Time Elapsed: 11.472643852233887
Episode 470, Total Reward: 1609, Total Time Elapsed: 12.4966459274292
Episode 471, Total Reward: 1657, Total Time Elapsed: 14.306520938873291
Episode 472, Total Reward: 1705, Total Time Elapsed: 11.766446828842163
Episode 473, Total Reward: 1716, Total Time Elapsed: 14.672616004943848
Episode 474, Total Reward: 1664, Total Time Elapsed: 16.855479955673218
Episode 475, Total Reward: 1600, Total Time Elapsed: 12.8838798999786

Episode 577, Total Reward: 1634, Total Time Elapsed: 16.562880992889404
Episode 578, Total Reward: 1609, Total Time Elapsed: 16.019295930862427
Episode 579, Total Reward: 1658, Total Time Elapsed: 13.275979042053223
Episode 580, Total Reward: 1756, Total Time Elapsed: 15.393110036849976
Episode 581, Total Reward: 1605, Total Time Elapsed: 12.727452278137207
Episode 582, Total Reward: 1757, Total Time Elapsed: 17.600022077560425
Episode 583, Total Reward: 1509, Total Time Elapsed: 20.242660999298096
Episode 584, Total Reward: 1750, Total Time Elapsed: 19.916314840316772
Episode 585, Total Reward: 1626, Total Time Elapsed: 17.732928037643433
Episode 586, Total Reward: 1787, Total Time Elapsed: 11.24829912185669
Episode 587, Total Reward: 1452, Total Time Elapsed: 12.229342937469482
Episode 588, Total Reward: 1744, Total Time Elapsed: 14.276324272155762
Episode 589, Total Reward: 1842, Total Time Elapsed: 11.443138122558594
Episode 590, Total Reward: 1731, Total Time Elapsed: 13.127830028

Episode 691, Total Reward: 1842, Total Time Elapsed: 11.412472248077393
Episode 692, Total Reward: 1560, Total Time Elapsed: 15.313773155212402
Episode 693, Total Reward: 1809, Total Time Elapsed: 11.46463918685913
Episode 694, Total Reward: 1650, Total Time Elapsed: 13.127011060714722
Episode 695, Total Reward: 1412, Total Time Elapsed: 18.93103837966919
Episode 696, Total Reward: 1680, Total Time Elapsed: 11.866947889328003
Episode 697, Total Reward: 1654, Total Time Elapsed: 15.773819208145142
Episode 698, Total Reward: 1812, Total Time Elapsed: 11.487465143203735
Episode 699, Total Reward: 1673, Total Time Elapsed: 11.508168935775757
Episode 700, Total Reward: 1673, Total Time Elapsed: 11.620367050170898
Episode 701, Total Reward: 1834, Total Time Elapsed: 11.850106954574585
Episode 702, Total Reward: 1786, Total Time Elapsed: 11.970161199569702
Episode 703, Total Reward: 1724, Total Time Elapsed: 12.210094213485718
Episode 704, Total Reward: 1864, Total Time Elapsed: 11.5274660587

Episode 806, Total Reward: 1776, Total Time Elapsed: 14.112152814865112
Episode 807, Total Reward: 1773, Total Time Elapsed: 11.773630857467651
Episode 808, Total Reward: 1675, Total Time Elapsed: 11.58770203590393
Episode 809, Total Reward: 1842, Total Time Elapsed: 11.368300199508667
Episode 810, Total Reward: 1694, Total Time Elapsed: 11.981068134307861
Episode 811, Total Reward: 1689, Total Time Elapsed: 12.218518018722534
Episode 812, Total Reward: 1739, Total Time Elapsed: 11.63018798828125
Episode 813, Total Reward: 1818, Total Time Elapsed: 15.235821962356567
Episode 814, Total Reward: 1722, Total Time Elapsed: 15.256924867630005
Episode 815, Total Reward: 1827, Total Time Elapsed: 11.460286140441895
Episode 816, Total Reward: 1772, Total Time Elapsed: 11.423779010772705
Episode 817, Total Reward: 1605, Total Time Elapsed: 11.706240892410278
Episode 818, Total Reward: 1842, Total Time Elapsed: 11.57602834701538
Episode 819, Total Reward: 1836, Total Time Elapsed: 11.44940805435

Episode 921, Total Reward: 1631, Total Time Elapsed: 12.346239805221558
Episode 922, Total Reward: 1659, Total Time Elapsed: 14.864920616149902
Episode 923, Total Reward: 1851, Total Time Elapsed: 11.431995868682861
Episode 924, Total Reward: 1842, Total Time Elapsed: 11.429617881774902
Episode 925, Total Reward: 1842, Total Time Elapsed: 11.384379148483276
Episode 926, Total Reward: 1709, Total Time Elapsed: 13.151773929595947
Episode 927, Total Reward: 1711, Total Time Elapsed: 13.66708493232727
Episode 928, Total Reward: 1632, Total Time Elapsed: 12.637381076812744
Episode 929, Total Reward: 1602, Total Time Elapsed: 12.43413496017456
Episode 930, Total Reward: 1649, Total Time Elapsed: 12.254723072052002
Episode 931, Total Reward: 1833, Total Time Elapsed: 11.368426084518433
Episode 932, Total Reward: 1842, Total Time Elapsed: 11.348835945129395
Episode 933, Total Reward: 1826, Total Time Elapsed: 11.535277128219604
Episode 934, Total Reward: 1723, Total Time Elapsed: 11.3425018787

Episode 1035, Total Reward: 1713, Total Time Elapsed: 11.818320035934448
Episode 1036, Total Reward: 1842, Total Time Elapsed: 11.591270208358765
Episode 1037, Total Reward: 1755, Total Time Elapsed: 11.984946012496948
Episode 1038, Total Reward: 1781, Total Time Elapsed: 16.144232034683228
Episode 1039, Total Reward: 1842, Total Time Elapsed: 11.480739116668701
Episode 1040, Total Reward: 1764, Total Time Elapsed: 12.0383939743042
Episode 1041, Total Reward: 1665, Total Time Elapsed: 13.91144323348999
Episode 1042, Total Reward: 1908, Total Time Elapsed: 12.939919710159302
Episode 1043, Total Reward: 1677, Total Time Elapsed: 12.741503953933716
Episode 1044, Total Reward: 1635, Total Time Elapsed: 12.436069011688232
Episode 1045, Total Reward: 1787, Total Time Elapsed: 13.3557448387146
Episode 1046, Total Reward: 1823, Total Time Elapsed: 12.996184825897217
Episode 1047, Total Reward: 1842, Total Time Elapsed: 11.36890697479248
Episode 1048, Total Reward: 1842, Total Time Elapsed: 11.

Episode 1148, Total Reward: 1836, Total Time Elapsed: 11.992761850357056
Episode 1149, Total Reward: 1842, Total Time Elapsed: 11.399078845977783
Episode 1150, Total Reward: 1786, Total Time Elapsed: 11.431005954742432
Episode 1151, Total Reward: 1842, Total Time Elapsed: 11.470607280731201
Episode 1152, Total Reward: 1827, Total Time Elapsed: 11.494774103164673
Episode 1153, Total Reward: 1687, Total Time Elapsed: 11.34784984588623
Episode 1154, Total Reward: 1728, Total Time Elapsed: 12.534361839294434
Episode 1155, Total Reward: 1833, Total Time Elapsed: 11.31275224685669
Episode 1156, Total Reward: 1842, Total Time Elapsed: 11.414982318878174
Episode 1157, Total Reward: 1842, Total Time Elapsed: 11.450828075408936
Episode 1158, Total Reward: 1707, Total Time Elapsed: 11.881331205368042
Episode 1159, Total Reward: 1842, Total Time Elapsed: 12.008012771606445
Episode 1160, Total Reward: 1842, Total Time Elapsed: 11.40351390838623
Episode 1161, Total Reward: 1731, Total Time Elapsed: 

Episode 1261, Total Reward: 1718, Total Time Elapsed: 12.135891914367676
Episode 1262, Total Reward: 1715, Total Time Elapsed: 14.64145803451538
Episode 1263, Total Reward: 1842, Total Time Elapsed: 11.652969121932983
Episode 1264, Total Reward: 1842, Total Time Elapsed: 11.400877952575684
Episode 1265, Total Reward: 1853, Total Time Elapsed: 11.317430973052979
Episode 1266, Total Reward: 1842, Total Time Elapsed: 11.358793020248413
Episode 1267, Total Reward: 1851, Total Time Elapsed: 11.773451805114746
Episode 1268, Total Reward: 1834, Total Time Elapsed: 11.462872982025146
Episode 1269, Total Reward: 1851, Total Time Elapsed: 11.376535892486572
Episode 1270, Total Reward: 1851, Total Time Elapsed: 11.43485689163208
Episode 1271, Total Reward: 1793, Total Time Elapsed: 11.429692268371582
Episode 1272, Total Reward: 1851, Total Time Elapsed: 12.0256507396698
Episode 1273, Total Reward: 1791, Total Time Elapsed: 11.496495008468628
Episode 1274, Total Reward: 1851, Total Time Elapsed: 1

Episode 1374, Total Reward: 1836, Total Time Elapsed: 11.409825086593628
Episode 1375, Total Reward: 1851, Total Time Elapsed: 11.44976806640625
Episode 1376, Total Reward: 1851, Total Time Elapsed: 11.470772981643677
Episode 1377, Total Reward: 1851, Total Time Elapsed: 11.576329946517944
Episode 1378, Total Reward: 1851, Total Time Elapsed: 11.491765975952148
Episode 1379, Total Reward: 1851, Total Time Elapsed: 11.440654039382935
Episode 1380, Total Reward: 1851, Total Time Elapsed: 11.420332670211792
Episode 1381, Total Reward: 1851, Total Time Elapsed: 11.442840099334717
Episode 1382, Total Reward: 1851, Total Time Elapsed: 11.459800958633423
Episode 1383, Total Reward: 1851, Total Time Elapsed: 11.447036743164062
Episode 1384, Total Reward: 1851, Total Time Elapsed: 12.04000473022461
Episode 1385, Total Reward: 1851, Total Time Elapsed: 11.40133810043335
Episode 1386, Total Reward: 1821, Total Time Elapsed: 11.511616945266724
Episode 1387, Total Reward: 1823, Total Time Elapsed: 

Episode 1487, Total Reward: 1851, Total Time Elapsed: 11.4046790599823
Episode 1488, Total Reward: 1791, Total Time Elapsed: 11.509000778198242
Episode 1489, Total Reward: 1851, Total Time Elapsed: 11.430219888687134
Episode 1490, Total Reward: 1851, Total Time Elapsed: 11.40218710899353
Episode 1491, Total Reward: 1651, Total Time Elapsed: 12.096655130386353
Episode 1492, Total Reward: 1851, Total Time Elapsed: 11.40752387046814
Episode 1493, Total Reward: 1851, Total Time Elapsed: 11.408222913742065
Episode 1494, Total Reward: 1851, Total Time Elapsed: 11.39954423904419
Episode 1495, Total Reward: 1754, Total Time Elapsed: 12.778739213943481
Episode 1496, Total Reward: 1851, Total Time Elapsed: 12.0210120677948
Episode 1497, Total Reward: 1851, Total Time Elapsed: 11.457138299942017
Episode 1498, Total Reward: 1914, Total Time Elapsed: 16.68999218940735
Episode 1499, Total Reward: 1851, Total Time Elapsed: 11.374703168869019
Episode 1500, Total Reward: 1851, Total Time Elapsed: 11.38

Episode 1600, Total Reward: 1851, Total Time Elapsed: 11.42384672164917
Episode 1601, Total Reward: 1851, Total Time Elapsed: 12.01458477973938
Episode 1602, Total Reward: 1851, Total Time Elapsed: 11.433491945266724
Episode 1603, Total Reward: 1851, Total Time Elapsed: 11.354981899261475
Episode 1604, Total Reward: 1851, Total Time Elapsed: 11.391626834869385
Episode 1605, Total Reward: 1851, Total Time Elapsed: 11.415020942687988
Episode 1606, Total Reward: 1851, Total Time Elapsed: 11.456966161727905
Episode 1607, Total Reward: 1841, Total Time Elapsed: 11.371758222579956
Episode 1608, Total Reward: 1851, Total Time Elapsed: 11.375727891921997
Episode 1609, Total Reward: 1851, Total Time Elapsed: 11.466425895690918
Episode 1610, Total Reward: 1851, Total Time Elapsed: 11.383352756500244
Episode 1611, Total Reward: 1835, Total Time Elapsed: 11.396675825119019
Episode 1612, Total Reward: 1724, Total Time Elapsed: 13.483402967453003
Episode 1613, Total Reward: 1851, Total Time Elapsed:

Episode 1713, Total Reward: 1851, Total Time Elapsed: 11.412692070007324
Episode 1714, Total Reward: 1851, Total Time Elapsed: 12.016615867614746
Episode 1715, Total Reward: 1833, Total Time Elapsed: 11.497790098190308
Episode 1716, Total Reward: 1815, Total Time Elapsed: 11.55967903137207
Episode 1717, Total Reward: 1851, Total Time Elapsed: 11.433305025100708
Episode 1718, Total Reward: 1851, Total Time Elapsed: 11.415961027145386
Episode 1719, Total Reward: 1834, Total Time Elapsed: 11.65222692489624
Episode 1720, Total Reward: 1851, Total Time Elapsed: 11.453473806381226
Episode 1721, Total Reward: 1851, Total Time Elapsed: 11.456846952438354
Episode 1722, Total Reward: 1851, Total Time Elapsed: 11.39926815032959
Episode 1723, Total Reward: 1698, Total Time Elapsed: 11.295138120651245
Episode 1724, Total Reward: 1851, Total Time Elapsed: 11.549458980560303
Episode 1725, Total Reward: 1851, Total Time Elapsed: 12.059834957122803
Episode 1726, Total Reward: 1851, Total Time Elapsed: 

Episode 1826, Total Reward: 1851, Total Time Elapsed: 11.465933084487915
Episode 1827, Total Reward: 1851, Total Time Elapsed: 11.491019010543823
Episode 1828, Total Reward: 1851, Total Time Elapsed: 12.06480598449707
Episode 1829, Total Reward: 1851, Total Time Elapsed: 11.537588834762573
Episode 1830, Total Reward: 1851, Total Time Elapsed: 11.481017827987671
Episode 1831, Total Reward: 1851, Total Time Elapsed: 11.490972995758057
Episode 1832, Total Reward: 1851, Total Time Elapsed: 11.455910205841064
Episode 1833, Total Reward: 1722, Total Time Elapsed: 11.6602623462677
Episode 1834, Total Reward: 1851, Total Time Elapsed: 11.637425899505615
Episode 1835, Total Reward: 1851, Total Time Elapsed: 11.52040410041809
Episode 1836, Total Reward: 1851, Total Time Elapsed: 11.355034828186035
Episode 1837, Total Reward: 1851, Total Time Elapsed: 11.36994981765747
Episode 1838, Total Reward: 1851, Total Time Elapsed: 11.467497110366821
Episode 1839, Total Reward: 1851, Total Time Elapsed: 11

Episode 1939, Total Reward: 1851, Total Time Elapsed: 12.791153907775879
Episode 1940, Total Reward: 1851, Total Time Elapsed: 12.37924313545227
Episode 1941, Total Reward: 1851, Total Time Elapsed: 12.045729160308838
Episode 1942, Total Reward: 1851, Total Time Elapsed: 11.891481161117554
Episode 1943, Total Reward: 1851, Total Time Elapsed: 12.066845893859863
Episode 1944, Total Reward: 1851, Total Time Elapsed: 12.578654289245605
Episode 1945, Total Reward: 1812, Total Time Elapsed: 16.754152059555054
Episode 1946, Total Reward: 1851, Total Time Elapsed: 14.143918991088867
Episode 1947, Total Reward: 1851, Total Time Elapsed: 12.142959117889404
Episode 1948, Total Reward: 1810, Total Time Elapsed: 12.834753036499023
Episode 1949, Total Reward: 1851, Total Time Elapsed: 12.519340991973877
Episode 1950, Total Reward: 1851, Total Time Elapsed: 12.604670286178589
Episode 1951, Total Reward: 1851, Total Time Elapsed: 12.470104932785034
Episode 1952, Total Reward: 1851, Total Time Elapsed