# Q-Networks for Hexagon Chess

## Setup

In [None]:
# Setup
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import tensorflow.keras as krs
plt.style.use("tableau-colorblind10")
plt.rcParams.update({'font.size': 14})
gpus = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(gpus[0], True)
print(gpus[0])

In [None]:
# Hex Chess Environment & Players
from hexchess.board import HexChessBoard
from engines.environment import HexChessEnv
from engines.qnetworks import QNetworkAgent, QLearning
from hexchess.players import RandomPlayer, GreedyPlayer
from utils import plot_step_rewards, plot_episode_rewards

In [None]:
# Constants
base_path = os.path.join("assets", "qnetworks")
figs_path = os.path.join("figures", "qnetworks")

## The environment

In [None]:
# Teach Q networks to play against a random agent playing white
opponent_class = GreedyPlayer
opponent_is_white = False
apply_negative_scores = False
board_kwargs = {
    "initialize_random": True,
    "save_dir": os.path.join("states", f"{np.random.randint(1000, 10000)}".zfill(5)),
}
env = HexChessEnv(
    opponent_class, 
    opponent_is_white, 
    apply_negative_scores=apply_negative_scores, 
    board_kwargs=board_kwargs
)

In [None]:
env.render()

## The Agent

In [None]:
# Configure agent
is_large = True
use_pretrained_weights = True
pretrained_model_path = "./assets/qnetworks/randomgreedy_pr_white_large" if use_pretrained_weights else None

In [None]:
# Construct model name
pretrained_pre_ = os.path.splitext(os.path.basename(pretrained_model_path))[0].split("_")[0] if pretrained_model_path is not None else ""
opponent_name = opponent_class.name.split(" ")[0].lower()
_reward_app = "_nr" if apply_negative_scores else "_pr"
model_color = "black" if opponent_is_white else "white"
_large_app = "_large" if is_large else ""
model_name = f"{pretrained_pre_}{opponent_name}{_reward_app}_{model_color}{_large_app}"
print(model_name)

In [None]:
# Initialize 
agent = QNetworkAgent(
    model_path=pretrained_model_path, 
    is_large=is_large, 
    discount_factor=0.1, 
    learning_rate=1e-3
)

In [None]:
agent.fix_model()
agent.model_fixed.summary()

## Learn

In [None]:
# Learning configuration
n_episodes = 5000
model_fix_episodes = 10
max_episode_length = 50
batch_size = 512
memory_size = batch_size * 4

In [None]:
q_learning = QLearning(agent, env, memory_size=memory_size)

In [None]:
episode_rewards, step_rewards = q_learning.learn(
    n_episodes=n_episodes,
    model_fix_episodes=model_fix_episodes,
    max_episode_length=max_episode_length,
    batch_size=batch_size,
)

### Save results

In [None]:
# Save results
agent.model.save(os.path.join("assets", "qnetworks", f"{model_name}"))
np.savetxt(os.path.join(base_path, f"{model_name}_episode_rewards.csv"), episode_rewards, delimiter=",")
np.savetxt(os.path.join(base_path, f"{model_name}_step_rewards.csv"), step_rewards, delimiter=",")

### Load Results

In [None]:
# Model configuration
opponent_class = GreedyPlayer
opponent_is_white = False
apply_negative_scores = False
is_large = True
use_pretrained_weights = True
pretrained_model_path = "./assets/qnetworks/randomgreedy_pr_model_white_large.keras" if use_pretrained_weights else None

# Model name
pretrained_pre_ = os.path.splitext(os.path.basename(pretrained_model_path))[0].split("_")[0] if pretrained_model_path is not None else ""
opponent_name = opponent_class.name.split(" ")[0].lower()
_reward_app = "_nr" if apply_negative_scores else "_pr"
model_color = "black" if opponent_is_white else "white"
_large_app = "_large" if is_large else ""
model_name = f"{pretrained_pre_}{opponent_name}{_reward_app}_{model_color}{_large_app}"
print(model_name)

In [None]:
# Load results
episode_rewards = np.loadtxt(os.path.join(base_path, f"{model_name}_episode_rewards.csv"), delimiter=",")
step_rewards = np.loadtxt(os.path.join(base_path, f"{model_name}_step_rewards.csv"))

### Visualize Results

In [None]:
# Make plots
fig, ax = plot_step_rewards(step_rewards, save_path=os.path.join(figs_path, f"{model_name}_step_rewards.png"))
fig, ax = plot_episode_rewards(episode_rewards, save_path=os.path.join(figs_path, f"{model_name}_episode_rewards.png"), kernel_size=100)
plt.show()

In [None]:
# Generate all figures
model_names = [os.path.splitext(file)[0] for file in os.listdir(base_path) if os.path.splitext(file)[-1] == ".keras"]
for model_name in model_names:
    print(model_name)
    episode_rewards = np.loadtxt(os.path.join(base_path, f"{model_name}_episode_rewards.csv"), delimiter=",")
    step_rewards = np.loadtxt(os.path.join(base_path, f"{model_name}_step_rewards.csv"))
    fig, ax = plot_step_rewards(step_rewards, save_path=os.path.join(figs_path, f"{model_name}_step_rewards.png"))
    plt.close(fig)
    fig, ax = plot_episode_rewards(episode_rewards, save_path=os.path.join(figs_path, f"{model_name}_episode_rewards.png"))
    plt.close(fig)
    #plt.show()