### Demonstration of an evaluation based on the MCTSAgent

This notebook uses the configuration file at `CrazyAra/DeepCrazyhouse/configs/main_config.py`
for loading the neural network weights

In [None]:
import chess
import chess.variant
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.insert(0,'../../../')
from DeepCrazyhouse.src.runtime.color_logger import enable_color_logging
from DeepCrazyhouse.src.domain.agent.neural_net_api import NeuralNetAPI
from DeepCrazyhouse.src.domain.agent.player.mcts_agent import MCTSAgent
from DeepCrazyhouse.src.domain.agent.player.raw_net_agent import RawNetAgent
from DeepCrazyhouse.src.domain.variants.game_state import GameState
from DeepCrazyhouse.src.runtime.color_logger import enable_color_logging
from time import time
%matplotlib inline
plt.style.use('seaborn-whitegrid')
enable_color_logging()

In [None]:
batch_size = 8
threads = 8

### Load the neural network
Change `ctx='cpu'` into `ctx='gpu'` if you have a nvidia gpu

In [None]:
nets = []
for idx in range(2):
    nets.append(NeuralNetAPI(ctx='cpu', batch_size=batch_size))

In [None]:
raw_agent = RawNetAgent(nets[0])

In [None]:
min_movetime = 10000
mcts_agent = MCTSAgent(nets, threads=threads,
                       playouts_empty_pockets=4096*5, playouts_filled_pockets=4096*5,
                       cpuct=2.5, u_init_divisor=0.25, dirichlet_epsilon=.25, dirichlet_alpha=0.2,
                       batch_size=batch_size, q_value_weight=0.7, max_search_depth=40, temperature=.07,
                       virtual_loss=3, verbose=True, temperature_moves=0, enhance_checks=True,
                       min_movetime=min_movetime, use_pruning=False, opening_guard_moves=0)

### Setup of board position

In [None]:
board = chess.variant.CrazyhouseBoard()

# you can either push a sequence of moves
#board.push_uci('e2e4')
#board.push_uci('e7e6')

# or set a fen position directly
# Test positions
#fen = '3R1b2/1bP1kp2/3Npn1p/3p4/5p2/5N1b/PPP1QP1P/3R1RK1/QPpprnpbp b - - 0 29'
fen = 'rn2N2k/pp5p/3pp1pN/3p4/3q1P2/3P1p2/PP3PPP/RN3RK1[Qrbbpbb] b - - 3 30' # d4f2 is winning
#fen ='2kr1b2/1bp2p1p/p3pP1p/1p5Q/5B2/3B1p2/PPP2PrP/R4R1K/QNpnnnp w - - 0 18'
#fen = 'r1bq1b1r/ppp1kPpp/4Pn2/n2Pp3/2B4n/3P4/PPP2PPP/RNBQK2R/ w KQ - 0 10'
#fen = '3rkb1r/2pb1pp1/q2Pp3/3pP2p/3P4/2PPnP2/P1P1N1PP/R2R2K1/NNQbb b k - 0 21' # mate threat from JannLee game

board.set_fen(fen)

state = GameState(board)
board

In [None]:
len(state.get_legal_moves())

In [None]:
def plot_moves_with_prob(moves, probs, only_top_x=None):
    
    # revert the ordering afterwards
    idx_order = np.argsort(probs)[::-1]
    
    if only_top_x is not None and only_top_x < len(idx_order):
        idx_order = idx_order[:only_top_x]
    
    #moves_ordered = moves[range(len(moves))] #idx_order[::-1]]
    probs_ordered = [] #probs[idx_order]
    
    moves_ordered = []
    for idx in idx_order:
        probs_ordered.append(probs[idx])
        moves_ordered.append(moves[idx])
        
    plt.barh(range(len(probs_ordered)), probs_ordered)
    plt.yticks(range(len(moves_ordered)), moves_ordered)


### Evalution using the raw network

In [None]:
t_s = time()
pred_value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv = raw_agent.evaluate_board_state(state)
print('Elapsed time: %.4fs' % (time()-t_s))

#### Value evaluation
Value is always returned with respect to the current player to move.

Therefore, if it's black's turn to move and the eval is positive then it assumes an advantage for the black player.
* `cp` is a conversion into centi-pawn metric. +100 cp means an advantage of 1 pawn.
* `pred_value` is the original predicted value ranging from [-1,+1]

In [None]:
cp

In [None]:
pred_value

#### Raw network policy
This policy return is deterministic for a specific neural network weight

In [None]:
plot_moves_with_prob(legal_moves, p_vec_small, only_top_x=10)

### Evalution using the MCTS-Agent

In [None]:
t_s = time()
pred_value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv = mcts_agent.evaluate_board_state(state)
print('Elapsed time: %.4fs' % (time()-t_s))

### Plots for the policy, visits and Q-values

In [None]:
plot_moves_with_prob(legal_moves, p_vec_small, only_top_x=10)

In [None]:
plot_moves_with_prob(legal_moves, mcts_agent.root_node.child_number_visits, only_top_x=10)

In [None]:
plot_moves_with_prob(legal_moves, mcts_agent.root_node.q_value, only_top_x=10)