In [148]:
import numpy as np
import time

C=3
R=3

def reverseColumns(arr, x, y): 
    for i in range(y, C+y): 
        j = x
        k = C-1+x
        while j < k: 
            t = arr[i][j] 
            arr[i][j] = arr[i][k] 
            arr[i][k] = t 
            j += 1
            k -= 1
    
# Function for do transpose of matrix 
def transpose(arr, x, y): 
    for i in range(R): 
        for j in range(i, C):         
            t = arr[i+y][j+x] 
            arr[i+y][j+x] = arr[j+y][i+x] 
            arr[j+y][i+x] = t 

def rotate90(arr, x, y):
    transpose(arr, x, y)         
    reverseColumns(arr, x, y) 

def rotate270(arr, x, y): 
    reverseColumns(arr, x, y)    
    transpose(arr, x, y)     
    
def nMinMaxIndx(arr, n):
    a = np.array(arr)
    maxes = np.argpartition(a, -n)[-n:]
    mins = np.argpartition(a, n)[:n]
    
    return mins, maxes
    

In [149]:
def pentago(board, side, move, turn, direction, checkWin = True):
    
    if board[move[1]][move[0]] != 0:
        return board, False, False, True
    
    board[move[1]][move[0]] = side
    
    x = 0
    y = 0 
    if turn == 1:
        x = 3
    elif turn == 2:
        y = 3
    elif turn == 3:
        x = 3
        y = 3           

    if direction > 0:
        rotate90(board, x, y)
    else:
        rotate270(board, x, y)
        
    win1 = False
    win2 = False
    
    if checkwin:
        win1 = checkwin(board, 1)
        win2 = checkwin(board, -1)

    return board, win1, win2, False

def checkwin(board, side):
    for i in range(6): 
        n = 0
        m = 0
        for j in range(6): 
            if board[i][j] == side:
                n += 1
            else:
                n = 0 
                
            if board[j][i] == side:
                m += 1
            else:
                m = 0   
                
            if n == 5 or m == 5:
                return True 
    n = 0 
    m = 0   
    
    for i in range(6): 
        
        if board[i][i] == side:
            n += 1
        else:
            n = 0 
            
        if board[i][5-i] == side:
            m += 1
        else:
            m = 0 
            
        if n == 5 or m == 5:
            return True        

    n = 0 
    m = 0
    k = 0
    l = 0
    
    for i in range(0,5): 
        if board[i][1+i] == side:
            n += 1
        else:
            n = 0 
            
        if board[1+i][i] == side:
            m += 1
        else:
            m = 0 
            
        if board[1+i][5-i] == side:
            k += 1
        else:
            k = 0             
            
        if board[i][4-i] == side:
            l += 1
        else:
            l = 0       
            
        if n == 5 or m == 5 or k == 5 or l == 5:
            return True 
        
    return False

In [150]:
import keras
import random
from pprint import pprint


In [151]:
def getMove(result):
    max_val = 0
    max_i = 0

    for i in range(36):
        if result[i] > max_val:
            max_val = result[i]
            max_i = i
    y, x = divmod(max_i, 6)
    return x, y

def getScore(board):
    left = 0
    right = 0
    for i in range(6):
        for j in range(6):
            if board[i][j] == 1:
                right += 1
            elif board[i][j] == -1:
                left += 1
                
    return left, right

def goGame(left, right, verbose = False):
    board = [
        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],

        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],   
    ]
    
    penalty = [0,0]
    turn = 1
    count = 0
    
    while count < 36:

        if verbose:
            pprint(board)
            
        player = right if turn == 1 else left
        prediction = player.predict([[board], [turn]])[0]
        
        if verbose:
            pprint(prediction)
        
        movement = getMove(prediction)
        
        _, _, _, cantMove = pentago(board, turn, movement, 0, 0, checkWin = False)
        
        if cantMove:
            if turn == 1:
                penalty[1] -= 1
            else:
                penalty[0] -= 1                
        
        turn *= -1
        count += 1
   
    scores = getScore(board)
    
    return scores, penalty

def goRandomRound(models, count=1):
    size = len(models)
    scores = [0] * size
    
    for i in range(size):
        for r in range(count):

            j = 0

            while True:
              j = random.randint(0, size-1)
              if j != i:
                break  

            left = models[i]
            right = models[j]

            s1, _ = goGame(left, right)
            s2, _ = goGame(right, left)

            scores[i] += s1[0] + s2[1]

    return scores

def goRound(models):
    size = len(models)
    scores = [0] * size
    
    for i in range(size):
        for j in range(size):
            if i == j:
                continue
                
            left = models[i]
            right = models[j]
            
            s = goGame(left, right)
            
            #print("Game ", i, "vs", j, "result = ", s)
                        
            scores[i] += s[0]
            scores[j] += s[1]
            
    return scores

def replaceLoserWinner(models, loserIdx, winnerIdx):
    winnder = models[winnerIdx]
    child = cellDivision(winnder)
    goTrain(child, 1000, 1)
    models[loserIdx] = child

def replaceLosersWinners(models, scores, count):
    losers, winners = nMinMaxIndx(scores, count)
    
    for i in range(count):
        replaceLoserWinner(models, losers[i], winners[i])
    

In [164]:
def newModel():
    board_input = keras.layers.Input(shape=(6,6), name='board_input')
    game_input = keras.layers.Input(shape=(1,), name='game_input')

    f1 = keras.layers.Flatten()(board_input)
    #x1 = keras.layers.Dense(36, activation='tanh')(f1)
    #n1 = keras.layers.BatchNormalization()(x1)
    
    #n2 = keras.layers.BatchNormalization()(game_input)
    y1 = keras.layers.Dense(1, activation='tanh')(game_input)
    #n2 = keras.layers.BatchNormalization()(y2)

    added = keras.layers.Add()([f1, y1])

    hidden = keras.layers.Dense(36, activation='relu')(added)
    hidden2 = keras.layers.Dense(36, activation='relu')(hidden)
    hidden3 = keras.layers.Dense(36, activation='relu')(hidden2)

    normalization = keras.layers.BatchNormalization()(hidden3)

    out = keras.layers.Dense(36, activation='softmax')(normalization)

    model = keras.models.Model(inputs=[board_input, game_input], outputs=out)
    optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
    model.compile(optimizer=optimizer, loss=keras.losses.mean_squared_error, metrics=['accuracy'])
    
    return model

def generateTraining(game_depth = 36):
    n = random.randint(0, game_depth)
    
    board = [
        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],

        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],
        [0,0,0, 0,0,0],   
    ]
    
    output = [1] * 36
    
    for i in range(n):
        value = random.randint(-1, 1)        
        x = random.randint(0, 5)
        y = random.randint(0, 5)
        
        if value == 0:
            continue
        
        if board[y][x] != 0:
            continue
        
        board[y][x] = value        
        output[y*6 + x] = 0

                
    turn =  random.randint(0, 1)
    if turn == 0:
        turn = -1
    
    return board, turn, output
    return [[board], [turn]], [[output]]

def generateTrainings(size, game_depth = 36):
    boards = []
    turns = []
    outputs = []
    
    for i in range(size):
        board, turn, output  = generateTraining(game_depth)
        
        boards.append(board)
        turns.append(turn)
        outputs.append(output)
        
    return [boards, turns], [outputs]

def goTrain(model, size = 5, epochs = 1, game_depth = 36):
    #start = time.time()

    inputs, outputs = generateTrainings(size, game_depth)
    #end1 = time.time()

    model.fit(inputs, outputs, epochs=epochs, validation_split=0.3, verbose=0)
    #end2 = time.time()
    
    #print("\t\t", end1 - start, end2 - start)


def cellDivision(cell):
    #start = time.time()

    newCell = keras.models.clone_model(cell)
    #end1 = time.time()

    newCell.set_weights(cell.get_weights())
    #end2 = time.time()

    optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
    newCell.compile(optimizer=optimizer, loss=keras.losses.mean_squared_error, metrics=['accuracy'])
    #end3 = time.time()
   

    #end4 = time.time()
        
    #print("\t", end1 - start, end2 - start, end3 - start, end4 - start)
    
    return newCell

In [167]:
models = []

for i in range(30):
    models.append(newModel())

In [None]:
for i in range(20):
    print("Round #", i)
    
    games = 3
    
    start = time.time()
    
    scores = goRandomRound(models, games)
    print(i, max(scores), max(scores) / games, min(scores), sum(scores) / len(scores))    
    end1 = time.time()
    replaceLosersWinners(models, scores, 5)
    
    end2 = time.time()
    print(end1 - start, end2 - start)
    

    #keras.backend.clear_session()

    

Round # 0
0 18 6.0 6 11.133333333333333
22.136138200759888 36.53942012786865
Round # 1
1 22 7.333333333333333 6 12.966666666666667
12.089807748794556 28.024471282958984
Round # 2
2 25 8.333333333333334 6 13.033333333333333
11.486812353134155 26.317639589309692
Round # 3
3 28 9.333333333333334 6 14.533333333333333
13.476057052612305 29.158580780029297
Round # 4
4 25 8.333333333333334 7 15.0
13.804596900939941 28.952629804611206
Round # 5
5 26 8.666666666666666 6 14.5
11.98508906364441 28.357539176940918
Round # 6
6 21 7.0 5 13.0
11.996484279632568 28.489814519882202
Round # 7
7 21 7.0 9 14.833333333333334
13.47383713722229 30.056121826171875
Round # 8
8 21 7.0 9 15.2
13.140975952148438 29.19355344772339
Round # 9
9 24 8.0 6 14.8
13.600513935089111 29.703643560409546
Round # 10
10 24 8.0 6 15.266666666666667
17.62833833694458 34.16317272186279
Round # 11


In [166]:
goRandomRound(models, 5)

[21,
 38,
 21,
 22,
 18,
 33,
 14,
 21,
 15,
 13,
 10,
 14,
 11,
 23,
 11,
 23,
 28,
 11,
 17,
 19,
 19,
 19,
 11,
 22,
 20,
 23,
 25,
 16,
 15,
 22,
 34,
 29,
 22,
 20,
 25,
 21,
 11,
 19,
 32,
 21,
 13,
 22,
 18,
 14,
 15,
 17,
 30,
 21,
 13,
 27,
 36,
 23,
 19,
 33,
 23,
 18,
 21,
 23,
 10,
 19,
 11,
 19,
 22,
 22,
 13,
 14,
 15,
 24,
 30,
 25,
 17,
 14,
 32,
 21,
 15,
 29,
 12,
 9,
 13,
 18,
 27,
 32,
 38,
 13,
 28,
 23,
 20,
 10,
 16,
 29,
 19,
 14,
 22,
 39,
 22,
 14,
 38,
 21,
 24,
 23]

In [128]:
goTrain(models[0], size = 100000, epochs = 1)

Train on 70000 samples, validate on 30000 samples
Epoch 1/1


In [65]:
board = [
    [0,1,1, 1,1,1],
    [1,1,1, 1,1,1],
    [1,1,1, 1,1,1],

    [1,1,1, 1,1,1],
    [1,1,1, 1,1,1],
    [1,1,1, 1,1,1],
]
    
models[0].predict([[board], [1]])[0]

array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       1.7139086e-15, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       6.9144002e-13, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 1.7587921e-25, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
      dtype=float32)