In [5]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import animation
from IPython.display import HTML
from pathlib import Path


# get the actuel postion of the solder, the postion in the front and the position in the front at right.
def solder_info(solder):
    # get solder type, line and position.
    stype, sline, sposition = solder
    
    a = 1 if stype == 1 else -1
    front, right = sline + a, sposition + a

    return [front, right, sposition]

# get the possible moves for this solder
def possible_moves(board, solder):
    
    empty = 0
    
    # get essential informations about the solder
    front, right, sposition = solder_info(solder)
    
    # All the empty position in the next line. Being empty and in the next line
    mask_nex_empties = np.all(np.c_[board[:,0] == empty, board[:,1] == front], axis=1)
    nex_empties = board[mask_nex_empties]

    # All the positions that the solder can occupy in the next line. Being in the front or right at the front.
    mask_poss_moves = np.any(np.c_[nex_empties[:,2] == sposition, nex_empties[:,2] == right], axis=1)
    poss_moves = nex_empties[mask_poss_moves]
    
    return poss_moves

# move the solder by emptying his postion and occupy his next postion
def move(board, solder, a_move):
    
    nboard = board.copy()
    # search for the next position to occupy
    mask = np.all(nboard == a_move, axis=1)
    replace_a_move = a_move
    replace_a_move[0] = solder[0] # solder type
    nboard[mask] = replace_a_move

    # search for the solder to empty his position
    mask = np.all(nboard == solder, axis=1)
    replace_solder = solder
    replace_solder[0] = 0
    nboard[mask] = replace_solder

    return nboard

# play randomly option
def play_randomly(board, side):
    
    # get all solders to see which can move
    solders = board[board[:,0] == side]
    np.random.shuffle(solders)
    
    result = []
    
    for solder in solders:
        moves = possible_moves(board, solder)
        if(moves.tolist()):
            choice = np.random.choice(len(moves))
            a_move = moves[choice]
            result = [False, a_move, solder]
            break
        else:
            result = [True, side]
        
    return result

# At the end of each game round, for the winner side we give "1" weight to each of his board leading to the win. for 
# for the loser side we add -1 for each board. This list of boards and their weight will be added to the list from the 
# from the previous round. so maybe there will be repeated boards. that what we will be removing in this method.
# remove the reputations of boards and add weights for each repeated board
def unique_boards(data):
    x = data[:,:-1]
    y = data[:,-1]
    

    if(x.size==0):
        return [x, y]
    
    
    else:
        # get the unique list of boards
        unique_x, inverse_x = np.unique(x, return_inverse = True, axis=0)

        tempy = np.zeros(len(unique_x))
        
        # associate each board with its weight. sum weight for the repeated boards.
        for i, ix in enumerate(inverse_x) :
            tempy[ix] = tempy[ix] + y[i]

        data = np.c_[unique_x, tempy]
        return data

# save the data of this game round
def update_boards(boards, winner):

    # reorganize and rewards boards
    l = len(boards)

    # test if the length is even. If even the winner is the first to play othewise he is the loser.
    if(l%2 == 0):
        winner_boards = boards.copy()
        # reshape it
        halfL = int(len(winner_boards) / 2)
        winner_boards = winner_boards.reshape(halfL, -1)

        loser_boards = boards.copy()
        # cut some boards as the loser is not the first to play nor is he the last.
        loser_boards = loser_boards[1:-1]
        halfL = int(len(loser_boards) / 2)
        loser_boards = loser_boards.reshape(halfL, -1)

    else:

        # the winner was not the first to play. so cut the first board
        winner_boards = boards.copy()
        winner_boards = winner_boards[1:]

        halfL = int(len(winner_boards) / 2)
        winner_boards = winner_boards.reshape(halfL, -1)


        loser_boards = boards.copy()
        # cut some boards as the loser is not the last to play.
        loser_boards = loser_boards[:-1]
        halfL = int(len(loser_boards) / 2)
        loser_boards = loser_boards.reshape(halfL, -1)

    stucked_boards = np.r_[winner_boards, loser_boards]
    
    # rewarding
    lw = len(winner_boards)
    ls = len(loser_boards)
    reward = 1/l
    reward_vector = np.r_[np.full(lw, reward), np.full(ls, -reward)]

    # Load the file
    file = Path("boardsdata.npz")

    if(file.exists()):
        saveddata = np.load("boardsdata.npz")
        saved_stucked_boards = saveddata['data'][:,:-1]
        saved_reward_vector = saveddata['data'][:,-1]

        new_stucked_boards = np.r_[saved_stucked_boards, stucked_boards]
        new_reward_vector = np.r_[saved_reward_vector, reward_vector]

        # make the list unique
        data = np.c_[new_stucked_boards, new_reward_vector]
        data = unique_boards(data)

        np.savez("boardsdata.npz", data=data)

    else:

        data = np.c_[stucked_boards, reward_vector]
        np.savez("boardsdata.npz", data=data)

# play
def play(boards):
    side = np.random.choice([1, 2])
    while(True):
        stop = False

        side = 1 if side == 2 else 2
        board = boards[-1]

        game_over = play_randomly(board, side)[0]

        if(game_over):
            break

        else:
            game_over, a_move, solder = play_randomly(board, side)
            nboard = move(board, solder, a_move)
            boards = np.append(boards, [nboard], axis=0)
    
#     update_boards(boards, side)
    return [boards, side]

# animation
def anim(boards):
    
    fig = plt.figure()
    
    ax = plt.axes()
    ax.set_xlim(-1, 3)
    ax.set_ylim(-1, 4)
    ax.grid()
    
    scat = ax.scatter([], [], s=400)
    
    def init():
        scat.set_offsets([[]])
        return scat,
    
    def animate(i):
        board = boards[i]
        data = np.c_[board[:,2], board[:,1]]        
        color = np.array(["cyan", "red", "blue"])
        
        color = color[board[:,0]]
        scat.set_facecolors(color)
        scat.set_offsets(data)
        
        return scat,
    
    return animation.FuncAnimation(fig, animate, init_func=init, frames = np.arange(len(boards)) ,
                               interval=1000, blit=False, repeat=False)


In [15]:
boardsi = np.array([[
    [1, 0, 0],
    [1, 0, 1],
    [1, 0, 2],


    [0, 1, 0],
    [0, 1, 1],
    [0, 1, 2],


    [0, 2, 0],
    [0, 2, 1],
    [0, 2, 2],


    [2, 3, 0],
    [2, 3, 1],
    [2, 3, 2],
]])
for i in np.arange(1000):
    boards, winner = play(boardsi)
    update_boards(boards, winner)
    
print("finish")

finish
