# Q-Networks for Hexagon Chess

## Setup

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import tensorflow.keras as krs
print(tf.config.list_physical_devices('GPU'))
print(tf.__version__)

2024-03-21 14:25:54.247754: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-21 14:25:54.395679: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-03-21 14:25:54.395698: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-03-21 14:25:55.068826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-

[]
2.11.0


2024-03-21 14:25:56.387595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2024-03-21 14:25:56.387617: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2024-03-21 14:25:56.387632: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (pop-os): /proc/driver/nvidia/version does not exist


In [3]:
from engines.environment import HexChessEnv
from engines.qnetworks import QNetworkAgent, QLearning
from hexchess.players import RandomPlayer, GreedyPlayer

pygame 2.5.2 (SDL 2.28.2, Python 3.10.0)
Hello from the pygame community. https://www.pygame.org/contribute.html


## The environment

In [4]:
# Teach Q networks to play against a random agent playing white
opponent_class = RandomPlayer
opponent_is_white = True
apply_negative_scores = False
env = HexChessEnv(opponent_class, opponent_is_white, apply_negative_scores=apply_negative_scores)

In [5]:
env.render()

     [34mB[0m [34mK[0m [34mN[0m [34mR[0m [34mP[0m * 
    [34mQ[0m [34mB[0m * * [34mP[0m * * 
   [34mN[0m * [34mB[0m * [34mP[0m * * * 
  [34mR[0m * * * [34mP[0m * * * * 
 [34mP[0m [34mP[0m [34mP[0m [34mP[0m [34mP[0m * * * * * 
* * * * * * * * * * * 
 * * * * * [33mP[0m [33mP[0m [33mP[0m [33mP[0m [33mP[0m 
  * * * * [33mP[0m * * * [33mR[0m 
   * * * [33mP[0m * [33mB[0m * [33mN[0m 
    * * [33mP[0m * * [33mB[0m [33mK[0m 
     * [33mP[0m [33mR[0m [33mN[0m [33mQ[0m [33mB[0m 


## The Agent

In [9]:
# Load weights pre-trained on a random player
use_pretrained_weights = True
model_path = "./assets/qnetworks/random_pr_model_black.keras" if use_pretrained_weights else None
agent = QNetworkAgent(model_path=model_path, discount_factor=0.1, learning_rate=1e-2)

In [10]:
# Load weights pre-trained against a random player
#agent.model.load_weights("./assets/qnetworks/random_pr_model_black.keras")

In [None]:
agent.fix_model()
agent.model_fixed.summary()

## Learn

In [None]:
# Learning configuration
n_episodes = 750
model_fix_episodes = 10
max_episode_length = 50
batch_size = 512
memory_size = batch_size * 4

In [None]:
q_learning = QLearning(agent, env, memory_size=memory_size)

In [None]:
episode_rewards, step_rewards = q_learning.learn(
    n_episodes=n_episodes,
    model_fix_episodes=model_fix_episodes,
    max_episode_length=max_episode_length,
    batch_size=batch_size,
)

### Save results

In [None]:
# Save results
pr_or_nr = "nr" if apply_negative_scores else "pr"
model_color = "black" if opponent_is_white else "white"
base_path = f"./assets/qnetworks/random_{pr_or_nr}_"
agent.model.save(f"{base_path}model_{model_color}.keras")
np.savetxt(f"{base_path}episode_rewards_{model_color}.csv", episode_rewards, delimiter=",")
np.savetxt(f"{base_path}step_rewards_{model_color}.csv", step_rewards, delimiter=",")

### Visualize Results

In [None]:
apply_negative_scores = False
opponent_is_white = True

In [None]:
# Load results
pr_or_nr = "nr" if apply_negative_scores else "pr"
model_color = "black" if opponent_is_white else "white"
base_name = f"randomgreedy_{pr_or_nr}_"
base_path = f"./assets/qnetworks/{base_name}"
episode_rewards = np.loadtxt(f"{base_path}episode_rewards_{model_color}.csv", delimiter=",")
step_rewards = np.loadtxt(f"{base_path}step_rewards_{model_color}.csv", delimiter=",")

In [None]:
# Plot step rewards
fig, ax = plt.subplots(figsize=(8, 4.5))
ax.plot(step_rewards)
ax.set_xlabel("Step")
ax.set_ylabel("Reward")
ax.set_title("Step Rewards")
ax.grid(True)
fig.tight_layout()
fig.savefig(f"./figures/qnetworks/{base_name}step_rewards_{model_color}.png", dpi=300, facecolor="white")
plt.show()

In [None]:
# Smooth step rewards
kernel_size = 250
kernel = np.ones(kernel_size) / kernel_size
step_rewards_smooth = np.convolve(step_rewards, kernel, mode="same")

In [None]:
# Plot smooth step rewards
fig, ax = plt.subplots(figsize=(8, 4.5))
ax.plot(step_rewards_smooth)
ax.set_xlabel("Step")
ax.set_ylabel("Reward")
ax.set_title(f"Average Step Rewards ({kernel_size} steps)")
ax.grid(True)
fig.tight_layout()
fig.savefig(f"./figures/qnetworks/{base_name}step_rewards_smooth{kernel_size}_{model_color}.png", dpi=300, facecolor="white")
plt.show()

In [None]:
# Plot episode remwards
fig, ax = plt.subplots(figsize=(8, 4.5))
ax.plot(episode_rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")
ax.set_title("Episode Rewards")
ax.grid(True)
fig.tight_layout()
fig.savefig(f"./figures/qnetworks/{base_name}episode_rewards_{model_color}.png", dpi=300, facecolor="white")
plt.show()

In [None]:
# Smooth episode rewards
kernel_size = 20
kernel = np.ones(kernel_size) / kernel_size
episode_rewards_smooth = np.convolve(episode_rewards, kernel, mode="same")

In [None]:
# Plot smooth episode remwards
fig, ax = plt.subplots(figsize=(8, 4.5))
ax.plot(episode_rewards_smooth)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")
ax.set_title(f"Average Episode Rewards ({kernel_size} episodes)")
ax.grid(True)
fig.tight_layout()
fig.savefig(f"./figures/qnetworks/{base_name}episode_rewards_smooth{kernel_size}_{model_color}.png", dpi=300, facecolor="white")
plt.show()