In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import print_function, absolute_import, division

In [3]:
import numpy as np
from GomokuBoard import GomokuBoard
from GomokuTools import GomokuTools as gt
from NH9x9 import NH9x9
from Heuristics import Heuristics

In [4]:
A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U = \
    1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
BLACK=0
WHITE=1
EDGES=2

## Single line scores

```Heuristics``` pre-computes all counts and scores a initialization time 

In [5]:
h = Heuristics(kappa=3.0)

Displaying the relevance count, which is a function of the number of offensive stones, their density, and open-ended-ness. Also displaying the score which adds some more fine-grained heuristic relevance aspects.

In [6]:
gt.line_for_xo('...xxo..')

[24, 4]

In [7]:
h.line_score_for(24,4)

3.5

In [8]:
h.lookup_line_score(24,4)

3.5

In [9]:
h.lookup_total_scores(np.array([[[4, 5, 0, 0],[4, 7.5, 0, 0]]]))

array([[6.9, 8. ]])

In [10]:
for i in [
    '...ox...',
    '...x..xo',
    '.....x..',
    '...x.xo.',
    '...x..x.',
    '...xxo..',
    'xx......',
    '...x.x..',
    '..oxx.x.',
    '...xx...',
    '.xx.....',
    '.....xxx',
    '...xxxo.',
    '...xx.x.',
    '...xxx..',
    '.oxxxx..',
    '..xxxx..']:
    print(i, h.lookup_line_score(*gt.line_for_xo(i)))

...ox... 0.5
...x..xo 1.5
.....x.. 2.0
...x.xo. 2.5
...x..x. 3.0
...xxo.. 3.5
xx...... 3.5
...x.x.. 4.0
..oxx.x. 4.5
...xx... 5.0
.xx..... 5.0
.....xxx 5.5
...xxxo. 5.5
...xx.x. 6.0
...xxx.. 7.0
.oxxxx.. 7.5
..xxxx.. 9.0


## Multi-line (neighbourhood) scores

In [11]:
line_counts=9-(np.sqrt(np.random.randint(0,18**2, [5, 5, 4]))).astype(int)/2

In [12]:
line_counts

array([[[1.5, 2.5, 3.5, 1.5],
        [1.5, 8.5, 2. , 3.5],
        [5.5, 1. , 3. , 0.5],
        [5.5, 2.5, 2.5, 1.5],
        [2.5, 1. , 7.5, 1. ]],

       [[1.5, 3. , 3.5, 0.5],
        [2.5, 6.5, 8.5, 3.5],
        [6.5, 3.5, 7.5, 7.5],
        [8.5, 3. , 1. , 1.5],
        [1.5, 1. , 5. , 2. ]],

       [[2.5, 4.5, 5.5, 4.5],
        [6.5, 1.5, 4. , 1.5],
        [8. , 2. , 0.5, 1. ],
        [6. , 8. , 7. , 6. ],
        [1. , 1. , 4.5, 1.5]],

       [[2. , 2.5, 2.5, 3.5],
        [3. , 5. , 2.5, 1. ],
        [1.5, 1.5, 0.5, 1.5],
        [5. , 2. , 6. , 5. ],
        [0.5, 5.5, 3.5, 5. ]],

       [[0.5, 8.5, 8.5, 3. ],
        [3. , 5.5, 4. , 0.5],
        [1. , 5. , 2. , 4.5],
        [3.5, 0.5, 2.5, 2.5],
        [2.5, 1. , 4.5, 4. ]]])

In [13]:
h.lookup_total_scores(line_counts)

array([[3.88196804, 8.        , 5.78273702, 5.66705111, 8.        ],
       [4.11883069, 8.        , 8.        , 8.        , 5.10446872],
       [7.        , 7.        , 8.        , 8.        , 4.55488346],
       [3.88196804, 5.3368033 , 1.88988157, 7.        , 6.9       ],
       [8.        , 6.9       , 6.00115718, 3.88196804, 7.        ]])

---
Some tools for visualization

In [14]:
def nhforxo(e,ne='........', n='........', nw='........'):
    """
    Neighbourhood from xo-strings
    """
    lines = np.array([gt.line_for_xo(xo) for xo in [e,ne,n,nw]])
    lines = np.rollaxis(lines, 1, 0)
    return NH9x9(*lines)

In [15]:
def dsc(h, e,ne='........',n='........', nw='........', kappa=1.2):
    """
    Display the neighbourhood and its score
    """
    counts = [h.lookup_line_score(*gt.line_for_xo(s)) for s in [e,ne,n,nw]]
    print(counts)
    print(h.lookup_total_scores(np.array([[counts]])))
    print(nhforxo(e,ne,n,nw))

---

#### Not strictly dangerous, yet.

In [16]:
dsc(h, '.....xxx')

[5.5, 0.0, 0.0, 0.0]
[[5.5]]
|                 |
|                 |
|                 |
|                 |
|        *   x x x|
|                 |
|                 |
|                 |
|                 |


In [17]:
dsc(h, '.....xxx', '....xx..')

[5.5, 5.0, 0.0, 0.0]
[[6.9]]
|                 |
|                 |
|            x    |
|          x      |
|        *   x x x|
|                 |
|                 |
|                 |
|                 |


In [18]:
dsc(h, '...xx..o', '...x.xo.')

[5.0, 2.5, 0.0, 0.0]
[[5.20020956]]
|                 |
|              o  |
|            x    |
|                 |
|      x * x     o|
|      x          |
|                 |
|                 |
|                 |


In [19]:
dsc(h, '...xxxo.','..x.....')

[5.5, 2.0, 0.0, 0.0]
[[5.5867779]]
|                 |
|                 |
|                 |
|                 |
|      x * x x o  |
|                 |
|    x            |
|                 |
|                 |


### Soft attack

In [20]:
dsc(h, '...x.x.o', '...xx...')

[4.0, 5.0, 0.0, 0.0]
[[6.9]]
|                 |
|                 |
|                 |
|          x      |
|      x *   x   o|
|      x          |
|                 |
|                 |
|                 |


In [21]:
dsc(h, '...xx..o', '...xx...')

[5.0, 5.0, 0.0, 0.0]
[[6.9]]
|                 |
|                 |
|                 |
|          x      |
|      x * x     o|
|      x          |
|                 |
|                 |
|                 |


#### Hard attack

In [22]:
dsc(h, '...xxxo.','.xx.....')

[5.5, 5.0, 0.0, 0.0]
[[6.9]]
|                 |
|                 |
|                 |
|                 |
|      x * x x o  |
|                 |
|    x            |
|  x              |
|                 |


#### Fatal

In [23]:
dsc(h, '...xxx.o')

[7.0, 0.0, 0.0, 0.0]
[[7.]]
|                 |
|                 |
|                 |
|                 |
|      x * x x   o|
|                 |
|                 |
|                 |
|                 |


#### Dead

In [24]:
dsc(h, '...xxxxo')

[7.5, 0.0, 0.0, 0.0]
[[8.]]
|                 |
|                 |
|                 |
|                 |
|      x * x x x o|
|                 |
|                 |
|                 |
|                 |


### Considering all options for critical defense states
The defense of critical positions may not only come through occupation of that position but also through any other position within the 9x9 neighbourhood of that position. Thus, when there is a critical defense situation, all candidate positions must be checked by test-occupying them and checking whether the originally alerting position's criticality is alleviated by that stone. The policy should meaningfully choose between all those potentially alleviating options. Not too smartly though, for that's what we'll have the Deep Q-Network for. 

### Determining an estimate for the Q-Values
#### An estimate for the state's value
The difference between the sums of offensive scores and defensive scores may be a good first estimate for the value of a board state. 


#### Q-Value Estimation for non-critical states
We could use the top10 of each style -> 30 positions, occupy each, calculate the state value afterwards to obtain s,a->v triples. All other s,a combinations are considered bad, and may get a penalty as the "value of an irrelevant move followed by opponent's best response", subtracted from the board's current value