# Q-Networks for Hexagon Chess

## Setup

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os

In [3]:
from engines.environment import HexChessEnv
from engines.qnetworks import QNetworkAgent, QLearning
from hexchess.players import RandomPlayer

pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [4]:
import tensorflow.keras as krs

## The environment

In [5]:
# Teach Q networks to play against a random agent playing white
opponent_class = RandomPlayer
opponent_is_white = True
env = HexChessEnv(opponent_class, opponent_is_white, apply_negative_scores=False)

In [6]:
env.render()

     [34mB[0m [34mK[0m [34mN[0m [34mR[0m [34mP[0m * 
    [34mQ[0m [34mB[0m * * [34mP[0m * * 
   [34mN[0m * [34mB[0m * [34mP[0m * * * 
  [34mR[0m * * * [34mP[0m * * * * 
 [34mP[0m [34mP[0m [34mP[0m [34mP[0m [34mP[0m * * * * * 
* * * * * * * * * * * 
 * * * * * [33mP[0m [33mP[0m [33mP[0m [33mP[0m [33mP[0m 
  * * * * [33mP[0m * * * [33mR[0m 
   * * * [33mP[0m * [33mB[0m * [33mN[0m 
    * * [33mP[0m * * [33mB[0m [33mK[0m 
     * [33mP[0m [33mR[0m [33mN[0m [33mQ[0m [33mB[0m 


## The Agent

In [7]:
agent = QNetworkAgent(discount_factor=0.1, learning_rate=1e-1)

In [8]:
agent.fix_model()
agent.model_fixed.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 11, 11, 6)         330       
                                                                 
 up_sampling2d (UpSampling2  (None, 33, 33, 6)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 33, 33, 3)         165       
                                                                 
 up_sampling2d_1 (UpSamplin  (None, 99, 99, 3)         0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 95, 95, 1)         76        
                                                                 
 conv2d_3 (Conv2D)           (None, 91, 91, 1)         2

## Learn

In [9]:
# Learning configuration
n_episodes = 100
model_fix_episodes = 10
max_episode_length = 50
batch_size = 1024
memory_size = batch_size * 4

In [10]:
batch_size = 1024
q_learning = QLearning(agent, env, memory_size=memory_size)

In [None]:
episode_rewards, step_rewards = q_learning.learn(
    n_episodes=n_episodes,
    model_fix_episodes=model_fix_episodes,
    max_episode_length=max_episode_length,
    batch_size=batch_size,
)

Episode 0: reward = 10 over 50 moves
Episode 1: reward = 6 over 50 moves
Episode 2: reward = 20 over 50 moves
Episode 3: reward = 2 over 50 moves
Episode 4: reward = 54 over 34 moves
Episode 5: reward = 12 over 50 moves
Episode 6: reward = 18 over 50 moves
Episode 7: reward = 18 over 50 moves
Episode 8: reward = 17 over 50 moves
Episode 9: reward = 0 over 13 moves
Episode 10: reward = 29 over 50 moves
Episode 11: reward = 21 over 50 moves
Episode 12: reward = 6 over 50 moves
Episode 13: reward = 15 over 50 moves
Episode 14: reward = 4 over 50 moves
Episode 15: reward = 7 over 50 moves
Episode 16: reward = 23 over 50 moves
Episode 17: reward = 14 over 50 moves
Episode 18: reward = 50 over 46 moves
Episode 19: reward = 23 over 50 moves
Episode 20: reward = 7 over 50 moves
Episode 21: reward = 19 over 50 moves
Episode 22: reward = 11 over 50 moves
Episode 23: reward = 3 over 50 moves
Episode 24: reward = 12 over 50 moves


### Save results

In [None]:
# Save results
krs.saving.save_model(agent.model, "./model.keras")
np.savetxt("./episode_rewards.csv", episode_rewards, delimiter=",")
np.savetxt("./step_rewards.csv", step_rewards, delimiter=",")

In [None]:
# Load results
episode_rewards = np.loadtxt("./episode_rewards.csv", delimiter=",")
step_rewards = np.loadtxt("./step_rewards.csv", delimiter=",")

### Visualize Results

In [None]:
# Plot step rewards
fig, ax = plt.subplots()
ax.plot(step_rewards)
plt.show()

In [None]:
# Smooth step rewards
kernel_size = 125
kernel = np.ones(kernel_size) / kernel_size
step_rewards_smooth = np.convolve(step_rewards, kernel, mode="same")

In [None]:
# Plot smooth step rewards
fig, ax = plt.subplots()
ax.plot(step_rewards_smooth)
plt.show()

In [None]:
# Plot episode remwards
fig, ax = plt.subplots()
ax.plot(episode_rewards)
plt.show()

In [None]:
# Smooth episode rewards
kernel_size = 10
kernel = np.ones(kernel_size) / kernel_size
episode_rewards_smooth = np.convolve(episode_rewards, kernel, mode="same")

In [None]:
# Plot smooth episode remwards
fig, ax = plt.subplots()
ax.plot(episode_rewards_smooth)
plt.show()