In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm

from utils.environment import TicTacToeEnv
from utils.mcts import PureRandomMCTS

In [None]:
# simple game visualization in the console!
# we take this game and visualize it in the report

rng = np.random.default_rng(10)

real_game = TicTacToeEnv(rng=rng)
mcts = PureRandomMCTS(gamma=1.0, rng=rng)

print("--- NEW GAME ---")
print("Agent (O) vs Random Opponent (X)")
print("Initial Board (Opponent starts center):")
print(real_game.board)

step_count = 0
turn_best_action_q_value_history = []
while not real_game.done:
    print(f"\n--- Turn {step_count + 1} ---")

    # A. THINK
    # Run 1,000 simulations to build the tree from the current state
    print("Agent is thinking...")
    best_action, best_action_q_history = mcts.search(
        real_game, n_iterations=5000, track_q_value_history=True
    )
    turn_best_action_q_value_history.append(best_action_q_history)

    # Peek inside the brain
    root_node = mcts.get_node(real_game.board)
    q_val = root_node.q_values[best_action]
    print(f"Agent chose {best_action} with estimated Win Probability: {q_val:.2%}")
    print("Root Node Q-values", root_node.q_values)

    # B. ACT
    # Apply the move to the REAL game
    _, done, reward = real_game.step(best_action)

    # C. DISPLAY
    print("Resulting Board:")
    print(real_game.board)
    step_count += 1

# 3. Game Over
print("\n--- GAME OVER ---")
if reward == 1.0:
    print("üèÜ The MCTS Agent WON!")
elif reward == 0.0:
    if 0 not in real_game.board:
        print("ü§ù It's a DRAW.")
    else:
        print("‚ùå The Random Opponent WON (Agent Lost).")

real_game.reset()
mcts.reset()

--- NEW GAME ---
Agent (O) vs Random Opponent (X)
Initial Board (Opponent starts center):
[[ 0  0  0]
 [ 0 -1  0]
 [ 0  0  0]]

--- Turn 1 ---
Agent is thinking...
Agent chose (np.int64(0), np.int64(0)) with estimated Win Probability: 89.88%
Root Node Q-values {(np.int64(2), np.int64(1)): 0.8357361045208755, (np.int64(1), np.int64(2)): 0.8001315988129335, (np.int64(2), np.int64(0)): 0.8544647500185721, (np.int64(0), np.int64(1)): 0.8024307946638044, (np.int64(2), np.int64(2)): 0.8736584988508439, (np.int64(0), np.int64(2)): 0.8310251979104551, (np.int64(1), np.int64(0)): 0.837908285609818, (np.int64(0), np.int64(0)): 0.8988095778518229}
Resulting Board:
[[ 1  0 -1]
 [ 0 -1  0]
 [ 0  0  0]]

--- Turn 2 ---
Agent is thinking...
Agent chose (np.int64(2), np.int64(0)) with estimated Win Probability: 90.37%
Root Node Q-values {(np.int64(0), np.int64(1)): 0.28912900869499975, (np.int64(1), np.int64(2)): 0.30529615312224007, (np.int64(2), np.int64(1)): 0.4784199339246387, (np.int64(1), np.int

In [16]:
# display the best action q-value convergence over turns
fig = go.Figure()
for turn_idx, q_value_history in enumerate(turn_best_action_q_value_history):
    fig.add_trace(
        go.Scatter(
            x=list(range(len(q_value_history))),
            y=q_value_history,
            mode="lines+markers",
            marker=dict(size=3),
            name=f"Turn {turn_idx + 1}",
        )
    )

fig.update_layout(
    xaxis_title="Simulation Iteration",
    yaxis_title="Q-Value",
    legend_title="Turns",
    width=800,
)

fig.show()
fig.write_image("qvalue_convergence.pdf")

In [3]:
# create bar chart of wins, losses, and draws

rng = np.random.default_rng(42)

game = TicTacToeEnv(rng=rng)
mcts = PureRandomMCTS(gamma=1.0, rng=rng)

T = 1000
wins = 0
losses = 0
draws = 0

for i in tqdm(range(T)):
    while not game.done:
        best_action, _ = mcts.search(game, n_iterations=1000)
        _, done, reward = game.step(best_action)

    if reward == 1.0:
        wins += 1
    if reward == 0:
        losses += 1
    if reward == 0.5:
        draws += 1

    game.reset()
    mcts.reset()

sum = wins + losses + draws

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [04:39<00:00,  3.58it/s]


In [4]:
# Create a bar chart
fig = px.bar(
    x=["Wins", "Losses", "Draws"],
    y=[wins / sum, losses / sum, draws / sum],
    text_auto=True,
    labels={"x": "Outcome", "y": "Proportion"},
    color=["Wins", "Losses", "Draws"],
    height=400,
    width=500,
)

# Hide the legend
fig.update_layout(showlegend=False)
fig.show()

fig.write_image("outcome_proportions.pdf")