## Validation Script after a full Training Run

Runs an evaluation of the trained model on the test set
and some additional evaluations mostly for demonstration purposes.

* author: QueensGambit

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%reload_ext autoreload

In [None]:
import sys
sys.path.insert(0, '../../../')
import numpy as np
import chess.svg
import torch

from DeepCrazyhouse.src.training.lr_schedules.lr_schedules import *
from DeepCrazyhouse.src.training.validate_train_results_util import predict_single, eval_pos, show_first_x_examples, show_mating_fail_examples
from DeepCrazyhouse.src.runtime.color_logger import enable_color_logging
from DeepCrazyhouse.configs.train_config import TrainConfig, TrainObjects
from DeepCrazyhouse.src.training.train_cli_util import create_pytorch_model, create_validation_data, fill_train_objects
from DeepCrazyhouse.configs.main_config import main_config
from DeepCrazyhouse.src.training.trainer_agent_pytorch import load_torch_state, get_context, get_data_loader, evaluate_metrics
from DeepCrazyhouse.src.domain.variants.input_representation import board_to_planes, planes_to_board
from DeepCrazyhouse.src.domain.variants.output_representation import policy_to_moves, policy_to_best_move
from DeepCrazyhouse.src.preprocessing.dataset_loader import load_pgn_dataset
from DeepCrazyhouse.src.domain.variants.constants import MODE_CHESS, MODE_CRAZYHOUSE

## Settings

In [2]:
model_tar_path = "<insert-tar-path-here>"
model_type = "<insert-model-type-here>"  # same as was used for training (e.g. resnet)
use_custom_architecture = "<True-or-False>"  # decide if the model_config.py file should be used (e.g. False)

In [None]:
enable_color_logging()
mode = main_config["mode"]
tc = TrainConfig()
ctx = get_context(tc.context, tc.device_id)
to = TrainObjects()
fill_train_objects(tc, to)

val_data, x_val, yp_val = create_validation_data(tc)
input_shape = x_val[0].shape

model = create_pytorch_model(model_type, input_shape, tc, use_custom_architecture)

print('load current model weights:', model_tar_path)

load_torch_state(model, torch.optim.SGD(model.parameters(), lr=tc.max_lr), model_tar_path, tc.device_id)

## Show move predictions

In [None]:
idx = 0

if mode == MODE_CHESS:
    start_board = chess.Board()
elif mode == MODE_CRAZYHOUSE:
    start_board = chess.variant.CrazyhouseBoard()
else:
    start_board = planes_to_board(x_val[idx], normalized_input=tc.normalize, mode=mode)
board = start_board
print(chess.COLOR_NAMES[board.turn])
if board.uci_variant == "crazyhouse":
    print(board.pockets)
board

In [None]:
net = model
net.eval()

In [None]:
x_start_pos = board_to_planes(board, normalize=tc.normalize, mode=mode)
pred = predict_single(net, x_start_pos, tc.select_policy_from_plane)
pred

In [None]:
policy_to_best_move(board, yp_val[idx])

In [None]:
opts = 5
selected_moves, probs = policy_to_moves(board, pred[1][0])
selected_moves[:opts]

In [None]:
plt.barh(range(opts)[::-1], probs[:opts])
ax = plt.gca()
ax.set_yticks(range(opts)[::-1])
ax.set_yticklabels(selected_moves[:opts])

In [None]:
board = start_board
board.push_uci('e2e4')
board.push_uci('e7e5')
board.push_uci('f1c4')
board.push_uci('b8c6')
board.push_uci('d1h5')
x_scholar_atck = board_to_planes(board, normalize=tc.normalize, mode=mode)
board

In [None]:
pred = predict_single(net, x_scholar_atck, tc.select_policy_from_plane)

selected_moves, probs = policy_to_moves(board, pred[1][0])
plt.barh(range(opts)[::-1], probs[:opts])
ax = plt.gca()
ax.set_yticks(range(opts)[::-1])
ax.set_yticklabels(selected_moves[:opts])

In [None]:
board.push(selected_moves[0])
board

### Performance on test dataset

In [None]:
s_idcs_test, x_test, yv_test, yp_test, yplys_test, pgn_datasets_test = load_pgn_dataset(dataset_type='test',
                                                                                        part_id=0,
                                                                                        verbose=True,
                                                                                        normalize=True)
test_data = get_data_loader(x_test, yv_test, yp_test, yplys_test, tc, shuffle=False)

In [None]:
evaluate_metrics(to.metrics, test_data, net, nb_batches=None, sparse_policy_label=tc.sparse_policy_label, ctx=ctx,
                 apply_select_policy_from_plane=tc.select_policy_from_plane, use_wdl=tc.use_wdl,
                 use_plys_to_end=tc.use_plys_to_end)

### Show result on mate-in-one problems

In [None]:
s_idcs_mate, x_mate, yv_mate, yp_mate, yplys_mate, pgn_dataset_mate = load_pgn_dataset(dataset_type='mate_in_one',
                                                                                       part_id=0,
                                                                                       verbose=True,
                                                                                       normalize=tc.normalize)
yplys_mate = np.ones(len(yv_mate))
mate_data = get_data_loader(x_mate, yv_mate, yp_mate, yplys_mate, tc, shuffle=False)

### Mate In One Performance

In [None]:
evaluate_metrics(to.metrics, mate_data, net, nb_batches=None, sparse_policy_label=tc.sparse_policy_label, ctx=ctx,
                 apply_select_policy_from_plane=tc.select_policy_from_plane, use_wdl=tc.use_wdl,
                 use_plys_to_end=tc.use_plys_to_end)

### Show some example mate problems

In [None]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [None]:
### Evaluate Performance

In [None]:
nb_pos = len(x_mate)
mates_found = []
mates_5_top_found = []
legal_mv_cnts = []
mate_mv_cnts = []

for i in range(nb_pos):
    pred, pred_moves, true_move, board, is_mate, is_mate_5_top, legal_mv_cnt, mate_mv_cnt = eval_pos(net, x_mate[i],
                                                                                                     yp_mate[i],
                                                                                                     select_policy_from_plane=tc.select_policy_from_plane)
    mates_found.append(is_mate)
    legal_mv_cnts.append(legal_mv_cnt)
    mate_mv_cnts.append(mate_mv_cnt)
    mates_5_top_found.append(is_mate_5_top)

In [None]:
np.array(mate_mv_cnts).mean()

In [None]:
np.array(legal_mv_cnts).mean()

### Random Guessing Baseline

In [None]:
np.array(mate_mv_cnts).mean() / np.array(legal_mv_cnts).mean()

### Prediction Performance

In [None]:
print('mate_in_one_acc:', sum(mates_found) / nb_pos)

In [None]:
sum(mates_5_top_found) / nb_pos

In [None]:
pgn_dataset_mate.tree()

In [None]:
metadata = np.array(pgn_dataset_mate['metadata'])
metadata[0, :]
metadata[1, :]

In [None]:
site_mate = metadata[1:, 1]

## Show the result of the first 17 examples

In [None]:
show_first_x_examples(17, net, site_mate, tc, x_mate, yp_mate)

## Show examples where it failed

In [None]:
show_mating_fail_examples(15, net, site_mate, tc, x_mate, yp_mate)