In [8]:
import tkinter as tk
import time

cells = {}

# global variables for GUI elements 
root = None
grid_frame = None
control_panel_frame = None
value_iteration_button = None
q_learning_button = None
policy_iteration_button = None
epsilon_greedy_q_button = None
reset_button = None
x_value_entry = None
r_value_entry = None
epsilon_entry = None


# Display function for V-Score board
    - This is the method to call when updating the display for the board which contains only v-scores and directions.
    - Takes in a list of tuples (v_score, direction), for each cell.
    - Tuples information is populated into cells starting from the top left and ending with the bottom right.

In [9]:
def v_display_grid(grid_frame, tuples_list):
    if len(tuples_list) != 9:
        raise ValueError("tuples_list must contain exactly 9 tuples.")

    global cells
    cells.clear() # clear existing cells so this can refresh the board on each call 

    tuple_index = 0
    for row in range(3):
        for col in range(4):
            if row == 0 and col == 3: # top right position on the board, this cell should not be given in the list of tuples, it's always 1
                text = "1.00"
            elif row == 1 and col == 3: # cell directly under the top right position on the board, same reason as above, it's always -1
                text = "-1.00"
            elif row == 1 and col == 1: # cell in middle row, 2nd column from the right, this is a wall cell, effectively not in the state space 
                text = "" 
            else:
                text = str(tuples_list[tuple_index])
                tuple_index += 1

            cell_key = (row, col)
            cell = tk.Label(grid_frame, text=text, relief=tk.SOLID, padx=10, pady=5, width=25, height=15) # Parent to grid_frame
            cell.grid(row=row, column=col, sticky="nsew")
            cells[cell_key] = cell

    # default row and column weights for the grid_frame resizing
    for i in range(3):
        grid_frame.grid_rowconfigure(i, weight=1)
    for i in range(4):
        grid_frame.grid_columnconfigure(i, weight=1)

# Display function for Q-Score board
    - This is the fuction that is called to display the board which contains the q-scores and of the 4 directions in each cell.
    - This takes in a quadtuple of q_scores, which are used to populate each cell.
        - The quadtuples populate the up, right, down, and left directions respectively when read left-to-right
    - The cells of the board are populated beginning with the top-left cell and ending with the bottom-right cell. 

In [10]:
def q_display_grid(grid_frame, quadtuple_list):
    if len(quadtuple_list) != 9:
        raise ValueError("quadtuple_list must contain exactly 9 quadtuples.")

    global cells 
    cells.clear() # clear existing cells so this can refresh the board on each call 

    # next iterate through the list of quadruples and interpret their positions to copy the value to generate each cell frame
    tuple_index = 0
    for row in range(3):
        for col in range(4):
            cell_key = (row, col)

            cell_frame = tk.Frame(grid_frame, relief=tk.SOLID, bd=1) # frame to hold labels, parent to grid_frame
            cell_frame.grid(row=row, column=col, sticky="nsew") #expand to fill cell

            cell_label = tk.Label(cell_frame, text="", padx=10, pady=5, width=25, height=15) # center label
            cell_label.grid(row=1, column=1) # center pos in frame

            top_label = tk.Label(cell_frame, text="", anchor="s") # top q-score
            top_label.grid(row=0, column=1, sticky="ew") # top pos, stretch to right and left sides of cell

            right_label = tk.Label(cell_frame, text="", anchor="w") # right q-score
            right_label.grid(row=1, column=2, sticky="ns") # right pos, stretch to top and bottom sides of cell

            bottom_label = tk.Label(cell_frame, text="", anchor="n") # bottom
            bottom_label.grid(row=2, column=1, sticky="ew") 

            left_label = tk.Label(cell_frame, text="", anchor="e") # left
            left_label.grid(row=1, column=0, sticky="ns") 

            if row == 0 and col == 3: # top right position on the board, this cell should not be given in the list of tuples, it's always 1
                cell_label.config(text="1.00") 
            elif row == 1 and col == 3: # cell directly under the top right position on the board, same reason as above, it's always -1
                cell_label.config(text="-1.00") 
            elif row == 1 and col == 1: # cell in middle row, 2nd column from the right, this is a wall cell, effectively not in the state space 
                cell_label.config(text="") 
                top_label.config(text="")
                right_label.config(text="")
                bottom_label.config(text="")
                left_label.config(text="")
            else:
                quadtuple = quadtuple_list[tuple_index]
                if not isinstance(quadtuple, tuple) or len(quadtuple) != 4: 
                    raise ValueError(f"Expected a quadtuple at index {tuple_index}, got: {quadtuple}")
                top_val, right_val, bottom_val, left_val = quadtuple
                top_label.config(text=str(top_val))
                right_label.config(text=str(right_val))
                bottom_label.config(text=str(bottom_val))
                left_label.config(text=str(left_val))
                tuple_index += 1

            cells[cell_key] = cell_frame

    # default row and column weights for the grid_frame, this is for resizing
    for i in range(3):
        grid_frame.grid_rowconfigure(i, weight=1)
    for i in range(4):
        grid_frame.grid_columnconfigure(i, weight=1)

In [11]:
def setup_gui():
    global root, grid_frame, control_panel_frame, value_iteration_button, q_learning_button, policy_iteration_button, epsilon_greedy_q_button, reset_button, x_value_entry, r_value_entry, epsilon_entry

    root = tk.Tk()
    root.title("Gridworld Display")

    # frame for the grid
    grid_frame = tk.Frame(root)
    grid_frame.grid(row=0, column=0, sticky="nsew")

    # frame for the panel of controls at the bottom
    control_panel_frame = tk.Frame(root)
    control_panel_frame.grid(row=1, column=0, sticky="ew")

    #  buttons (row 0 of control_panel_frame)
    value_iteration_button = tk.Button(control_panel_frame, text="Run Value Iteration", command=value_iteration)
    value_iteration_button.grid(row=0, column=0, padx=5, pady=5)

    q_learning_button = tk.Button(control_panel_frame, text="Run Q-Learning", command=q_learning)
    q_learning_button.grid(row=0, column=1, padx=5, pady=5)

    policy_iteration_button = tk.Button(control_panel_frame, text="Run Policy Iteration", command=policy_iteration)
    policy_iteration_button.grid(row=0, column=2, padx=5, pady=5)

    epsilon_greedy_q_button = tk.Button(control_panel_frame, text="Run Epsilon Greedy", command=epsilon_greedy)
    epsilon_greedy_q_button.grid(row=0, column=3, padx=5, pady=5)

    reset_button = tk.Button(control_panel_frame, text="Reset Grid")
    reset_button.grid(row=0, column=4, padx=5, pady=5)

    # input boxes and labels (row 1 of control_panel_frame) 
    x_value_label = tk.Label(control_panel_frame, text="X Value:")
    x_value_label.grid(row=1, column=0, padx=5, pady=5, sticky="e")
    x_value_entry = tk.Entry(control_panel_frame, width=5)
    x_value_entry.grid(row=1, column=1, padx=5, pady=5, sticky="w")
    x_value_entry.insert(0, "0.1") # default X value

    r_value_label = tk.Label(control_panel_frame, text="R Value:")
    r_value_label.grid(row=1, column=2, padx=5, pady=5, sticky="e")
    r_value_entry = tk.Entry(control_panel_frame, width=5)
    r_value_entry.grid(row=1, column=3, padx=5, pady=5, sticky="w")
    r_value_entry.insert(0, "-0.04") # default R value

    epsilon_label = tk.Label(control_panel_frame, text="Epsilon:")
    epsilon_label.grid(row=2, column=0, padx=5, pady=5, sticky="e")
    epsilon_entry = tk.Entry(control_panel_frame, width=5)
    epsilon_entry.grid(row=2, column=1, padx=5, pady=5, sticky="w")
    epsilon_entry.insert(0, "0.1") # default epsilon value

    # root window row and column weights, again for resizing
    root.grid_rowconfigure(0, weight=1)
    root.grid_columnconfigure(0, weight=1)

In [12]:
def value_iteration():
    pass

In [13]:
def q_learning():
    pass

In [14]:
def epsilon_greedy():
    pass

In [15]:
def policy_iteration():
    pass

In [17]:
def main():
    setup_gui() 

    initial_v_tuples = []
    for _ in range(9):
        initial_v_tuples.append((0.00, "Up")) # initializing to "Up" as default direction, following the examples in the slides
    # v_display_grid(grid_frame, initial_v_tuples) # displaying v_score grid by default


    initial_q_quadtuples = []
    for _ in range(9):
        initial_q_quadtuples.append((0.00, 0.00, 0.00, 0.00)) # Initialize to (0.00, 0.00, 0.00, 0.00)
    q_display_grid(grid_frame, initial_q_quadtuples)

    root.mainloop() 

if __name__ == "__main__":
    main()