## Tutorial
# Ray-based Policy Worker Pool
#### Equipped with the kick-start heuristic policy

In [1]:
import numpy as np
import ray
import aegomoku.tools as gt
from aegomoku.ray.trainer import create_pool, PolicyRef
from aegomoku.policies.ray_impl import HeuristicRayPolicy

In [2]:
examples_file = 'temperature/checkpoint_0.pth.tar.examples'
from pickle import Unpickler

with open(examples_file, "rb") as f:
    examples = Unpickler(f).load()

examples = examples[0]
example = examples[-1][0]
gt.print_channels(example)

shape: (17, 17, 3)
[[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 2 0 0 1 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 1 2 1 1 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 2 1 2 2 1 0 0 0 0 0 0 3]
 [3 0 0 0 0 2 0 1 0 2 1 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 1 1 2 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]
 [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]]


In [3]:
len(examples)

136

In [4]:
BOARD_SIZE=15

In [5]:
rctx = ray.init(ignore_reinit_error=True)

2022-08-04 14:55:33,640	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [6]:
dispatcher = create_pool(num_workers=2, policy=HeuristicRayPolicy(),
                         board_size=BOARD_SIZE, cut_off = 0.5)

[2m[36m(PolicyWorker pid=61319)[0m 2022-08-04 14:55:41.129969: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
[2m[36m(PolicyWorker pid=61319)[0m To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
[2m[36m(PolicyWorker pid=61319)[0m   result = asarray(a).shape
[2m[36m(PolicyWorker pid=61333)[0m 2022-08-04 14:55:44.650629: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
[2m[36m(PolicyWorker pid=61333)[0m To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
[2m[36m(PolicyWorker pid=61333)[0m   result = asarray(a).shape


### Verify the policy pool's function

A PolicyRef is a blocking endpoint to the dispatcher

In [7]:
policy = PolicyRef(dispatcher)

In [8]:
p, v = policy.predict(example)
print(v)

tf.Tensor(0.9290840531726265, shape=(), dtype=float64)


2022-08-04 14:55:45.381638: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
print((p*255).astype(np.uint8).reshape(15, 15))

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 245   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]]


In [10]:
ray.shutdown()