In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import print_function, absolute_import, division

In [3]:
import numpy as np
from GomokuBoard import GomokuBoard
from GomokuTools import GomokuTools as gt
from NH9x9 import NH9x9
from Heuristics import Heuristics

In [134]:
def num_offensive(o, d):
    s, l, offset = gt.mask2(o, d)
    m2o_bits = gt.as_bit_array(s)[:l]
    max_count = 0
    for w in [2,1,0]:
        i = 0
        while i <= len(m2o_bits) - 2 - w:
            count = sum(m2o_bits[i:i+w+2])
            count = 3*count - (w+2)
            if count > max_count:
                max_count = count
            i+=1
    if m2o_bits[0] == 0:
        max_count += 1.5
    if m2o_bits[-1] == 0:
        max_count += 1.5

    return max_count        


In [135]:
def line_score(xo):
    o,d = gt.line_for_xo(xo)
    m = gt.mask(o,d)
    m2 = gt.mask2(o,d)
    if m2[1] >= 4 and sum(gt.as_bit_array(m2[0])) >= 1:
        return num_offensive(o,d) - 2
    else:
        return 0

In [163]:
for i in [
    '...ox...',
    '...x..xo',
    '.....x..',
    '...x.xo.',
    '...x..x.',
    '...xxo..',
    'xx......',
    '...x.x..',
    '..oxx.x.',
    '...xx...',
    '.xx.....',
    '...xxxo.',
    '...xx.x.',
    '...xxx..',
]: 
    print(i, line_score(i))

...ox... 0.5
...x..xo 1.5
.....x.. 2.0
...x.xo. 2.5
...x..x. 3.0
...xxo.. 3.5
xx...... 3.5
...x.x.. 4.0
..oxx.x. 4.5
...xx... 5.0
.xx..... 5.0
...xxxo. 5.5
...xx.x. 6.0
...xxx.. 7.0


In [260]:
kappa=3.0
def nhcombine(l):
    l_ = sorted(l)
    if l_[-1]>=7 or (l_[-1] in [4.5,5.5,6,6.5,7.0] and l_[-2] >= 4):
        return 7 # truly strong
    
    if l_[-1] in [4,5] and l_[-2] in [4,5]:
        return 6.9 # can only be countered by strong counter-attack
    
    return (l_[-1]**kappa + l_[-2]**kappa)**(1/kappa)

In [271]:
values = np.arange(15)/2
values

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,
       6.5, 7. ])

In [272]:
precomputed = np.zeros([15*15*15*15])
for e in values:
    for ne in values:
        for n in values:
            for nw in values:
                v = nhcombine([e, ne, n, nw])
                precomputed[int(2*(3375*e+225*ne+15*n+nw))]=v

In [276]:
line_counts=np.random.randint(0,14, [5, 5, 4])/2

In [277]:
e,ne,n,nw = np.rollaxis(line_counts, 2, 0)
indices = (2*(3375*e+225*ne+15*n+nw)).astype(int)
scores = precomputed[indices]

In [278]:
for i in range(5):
    for j in range(5):
        print(line_counts[i][j], scores[i][j])

[0.  3.  6.  6.5] 7.0
[5.  5.5 6.  2.5] 7.0
[4.5 6.  3.5 3.5] 7.0
[1.  1.  6.  6.5] 7.0
[3.5 4.  6.  6.5] 7.0
[2.5 1.  4.  1. ] 4.302126224475824
[2.  6.  1.5 1. ] 6.0731779437513245
[3.  4.  1.5 1.5] 4.497941445275415
[0.  6.5 1.5 5. ] 7.0
[2.  4.  4.  3.5] 6.9
[5.5 0.5 2.  4.5] 7.0
[0.5 6.5 0.  1.5] 6.52651887934375
[1.5 3.5 5.  6. ] 7.0
[4.  2.  0.5 3.5] 4.745609979014665
[5.  6.5 0.5 1.5] 7.0
[1.5 1.  6.  0. ] 6.031088637883586
[4.  2.5 1.  6.5] 7.0
[1.  2.5 4.  4.5] 7.0
[5.5 2.  1.5 2.5] 5.667051108097064
[1.5 2.5 2.5 0.5] 3.1498026247371826
[4.5 5.5 5.5 2.5] 7.0
[5.  5.5 4.5 2.5] 7.0
[3.5 0.5 5.5 3. ] 5.936837414520608
[5.5 2.5 6.5 1. ] 7.0
[1.5 0.5 5.  5.5] 7.0


In [279]:
nhcombine([6.5, 0.,  2.5, 3.5 ])

6.822044948812934

Now, a classical strategy could be: (A:attack, D:defend)
- ALWAYS attack at level A7 position
- if not possible, ALWAYS defend level D7 position
- ALWAYS tree search level A6.9 for a strong defense
    - if not a strong defense, attack at that position
    - if strong defense possible, consider deeper tree search or alternatives
- ALWAYS tree search level D6.9 for a strong counter-attack
    - if no strong counter-attack possible, defend that position
    - if strong counter-attack possible, consider deeper tree search or alternatives
- Choose greedy (when fighting) or e-greedy when learning
    - allow policies with more agressive or more defensive style
