# Classes

In [1]:
class Pawn:

    """
    Class defining what a pawn is. It is characterised by :
    - its coordinates
    - its player
    - where it can go
    """

    def __init__(self, id_player):
        self.x = 0
        self.y = 0
        self.id = id_player
        self.accessibles = []
        self.active = True
        self.remain = 1

    def move(self, board, player, direction, k):
        """
        0 -> upper right (+1, -1)
        1 -> right (+2, 0)
        2 -> lower right (+1, +1)
        3 -> lower left (-1, +1)
        4 -> left (-2, 0)
        5 -> upper left (-1, -1)
        """

        x0 = self.x
        y0 = self.y

        if direction == 0:
            self.x += k
            self.y -= k
        elif direction == 1:
            self.x += 2*k
        elif direction == 2:
            self.x += k
            self.y += k
        elif direction == 3:
            self.x -= k
            self.y += k
        elif direction == 4:
            self.x -= 2*k
        elif direction == 5:
            self.x -= k
            self.y -= k

        player.score += board.cases_tab[y0][x0].score

        board.cases_tab[y0][x0].change_state(0)

        board.cases_tab[self.y][self.x].change_state(2)
        board.cases_tab[self.y][self.x].owner = self.id

    def place(self, board, pos_x, pos_y):
        self.x = pos_x
        self.y = pos_y

        board.cases_tab[pos_y][pos_x].change_state(2)
        board.cases_tab[pos_y][pos_x].owner = self.id

    def compute_accessible(self, board):

        """
        we try each direction one after another and collect the data under the format
        [a, b, c, d, e, f] where each letter accounts for the number of reachable (inclusive) cases in the given direction
        """
        if self.active:
            x = self.x
            y = self.y

            dirs = [[1, -1], [2, 0], [1, 1], [-1, 1], [-2, 0], [-1, -1]]

            def advance(x, y, dx, dy):
                k = 0
                while 0 <= x+dx < 15 and 0 <= y+dy < 8 and \
                    board.cases_tab[y+dy][x+dx].state == 1:
                    k += 1
                    x += dx
                    y += dy
                return k

            max_per_dir = []

            for (dx, dy) in dirs:
                max_per_dir.append(advance(x, y, dx, dy))

            self.accessibles = max_per_dir

            if self.accessibles == [0, 0, 0, 0, 0, 0]:
                self.active = False
                self.remain = 0
        else:
            self.remain = 1

In [2]:
class Player:

    """
    Class defining a player. It can have multiple modes:
    0 -> human
    1 -> random
    2 -> minimax
    """

    number_player = 0

    def __init__(self, mode, total_number):
        self.score = 0
        self.mode = mode
        self.number = Player.number_player

        p = []
        for k in range(total_number):
            p.append(Pawn(self.number))

        self.pawns = p

        Player.number_player += 1

In [3]:
class Case:

    """
    Class defining a case, characterised by :
    - its coordinates
    - its score
    - its state (1 for accessible, 0 for removed, 2 for occupied)
    - its owner (-1 for none, number of player otherwise)
    """

    def __init__(self, pos_x, pos_y, n, state = True):
        self.x = pos_x
        self.y = pos_y
        self.score = n
        self.state = 1
        self.owner = -1

    def change_state(self, new_state):
        self.state = new_state
    
    def change_owner(self, new_owner):
        self.owner = new_owner

In [4]:
class Board:

    """
    Class defining a board, set of cases
    """

    def __init__(self):
        n_1 = 30
        n_2 = 20
        n_3 = 10
        n_fish_tab = n_1*[1] + n_2*[2] + n_3*[3]
        for k in range(10):
            random.shuffle(n_fish_tab)

        cases_tab = []

        # number of cases already created
        i = 0

        for k in range(0, 8):

            if k % 2 == 0:
                n_case = 8
            else:
                n_case = 7

            line = []

            # to have a constant number of rows per line
            if k % 2 == 1:
                line.append(0)

            for l in range(0, n_case):

                n_fish = n_fish_tab[i]

                # creation of the case
                c = Case(2*l + k % 2, k, n_fish)
                line.append(c)
                line.append(0)

                i += 1

            # to have a constant number of rows per line
            if k % 2 == 0:
                line = line[:-1]

            cases_tab.append(line)

        self.cases_tab = cases_tab

    def display(self):
        
        for k in range(0, 8):

            if k % 2 == 0:
                n_case = 8
            else:
                n_case = 7

            s = ''
            if k % 2 == 1:
                s += ' '

            for l in range(0, 15):
                if self.cases_tab[k][l] != 0:
                    c = self.cases_tab[k][l]

                    if c.state == 0:
                        s += '* '
                    elif c.owner != -1:
                        letter = chr(ord('a')+c.owner)
                        s += letter + ' '
                    else:
                        s += str(c.score) + ' '

            print(s)

# Apprentissage par renforcement pour le choix du pion

In [5]:
import tensorflow as tf
import numpy as np

import pickle

import os
from datetime import datetime

## Fonctions utiles

In [6]:
def make_input(board, players, player_number, pawn_number):
    
    P = len(players)
    N = 6-P
    
    input_table = np.zeros((8, 15, P*N+1))
    
    # ajout du score des cases si accessibles, 0 sinon
    for k in range(len(board.cases_tab)): # [0; 7]
        for l in range(len(board.cases_tab[0])): # [0; 14]
            if board.cases_tab[k][l] != 0:
                case = board.cases_tab[k][l]
                if case.state == 1:
                    input_table[k, l, 0] = case.score
    
    def make_input_pawn(players, player_number, pawn_number):
        accessibles = players[player_number].pawns[pawn_number].accessibles
        x, y = players[player_number].pawns[pawn_number].x, players[player_number].pawns[pawn_number].y
        
        input_cur_pawn = np.zeros((8, 15))
        
        input_cur_pawn[x+1:x+accessibles[0]+1, y-1:y-accessibles[0]-1] = 1
        input_cur_pawn[x+2:x+2*accessibles[1]+1:2, y] = 1
        input_cur_pawn[x+1:x+accessibles[2]+1, y+1:y+accessibles[2]+1] = 1
        input_cur_pawn[x-1:x-accessibles[3]-1, y+1:y+accessibles[3]+1] = 1
        input_cur_pawn[x-2:x-2*accessibles[4]-1:2, y] = 1
        input_cur_pawn[x-1:x-accessibles[5]-1, y-1:y-accessibles[5]-1] = 1
    
        return input_cur_pawn
    
    input_table[:, :, 1] = make_input_pawn(players, player_number, pawn_number)
    
    i = 2
    
    for pawn in range(N):
        if pawn != pawn_number:
            input_table[:, :, i] = make_input_pawn(players, player_number, pawn_number)
            i += 1
    
    for player in range(P):
        if player != player_number:
            for pawn in range(N):
                input_table[:, :, i] = make_input_pawn(players, player, pawn)
                i += 1
    
    return input_table

In [7]:
def make_output(players, player_number, pawn_number, logits):
    
    accessibles = players[player_number].pawns[pawn_number].accessibles
    x, y = players[player_number].pawns[pawn_number].x, players[player_number].pawns[pawn_number].y
    
    proba = 0
    direction, distance = 0, 0
    
    for d in range(1, accessibles[0]+1):
        nx, ny = x + d, y - d
        nc = 15*y+x
        if logits[nc] > proba:
            proba = logits[nc]
            direction = 0
            distance = d
    
    for d in range(1, accessibles[1]+1):
        nx, ny = x + 2 * d, y
        nc = 15*y+x
        if logits[nc] > proba:
            proba = logits[nc]
            direction = 1
            distance = d
        
    for d in range(1, accessibles[2]+1):
        nx, ny = x + d, y + d
        nc = 15*y+x
        if logits[nc] > proba:
            proba = logits[nc]
            direction = 2
            distance = d
        
    for d in range(1, accessibles[3]+1):
        nx, ny = x - d, y + d
        nc = 15*y+x
        if logits[nc] > proba:
            proba = logits[nc]
            direction = 3
            distance = d
        
    for d in range(1, accessibles[4]+1):
        nx, ny = x - 2 * d, y
        nc = 15*y+x
        if logits[nc] > proba:
            proba = logits[nc]
            direction = 4
            distance = d
        
    for d in range(1, accessibles[5]+1):
        nx, ny = x - d, y - d
        nc = 15*y+x
        if logits[nc] > proba:
            proba = logits[nc]
            direction = 5
            distance = d
    
    
    return proba, direction, distance

In [8]:
def divide_data(X_data, y_data):
    n = len(X_data)
    training_ratio = 0.8
    training_number = int(training_ratio * n)
    
    indices = np.random.permutation(n)
    X_train = X_data[indices[:training_number]]
    y_train = y_data[indices[:training_number]]
    X_test = X_data[indices[training_number:]]
    y_test = y_data[indices[training_number:]]
    
    return X_train, y_train, X_test, y_test

In [9]:
def next_batch(X_data, y_data, batch_size):
    n = len(X_data)
    
    indices = np.random.permutation(n)
    return X_data[indices[:batch_size]], y_data[indices[:batch_size]]

In [10]:
def log_dir():
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "logs"
    name = "run-" + now
    return "{}/{}/".format(root_logdir, name)

## 1. Architecture

In [11]:
height = 15
width = 8
channels = 5

conv_filters_1 = 5
conv_kernel_1 = 5
conv_stride_1 = [1, 1]
conv_padding_1 = "SAME"

conv_filters_2 = 15
conv_kernel_2 = 5
conv_stride_2 = [1, 1]
conv_padding_2 = "SAME"

conv_filters_3 = 50
conv_kernel_3 = 5
conv_stride_3 = [1, 1]
conv_padding_3 = "SAME"

n_hidden_1 = 200
n_hidden_2 = 150

n_output = 1

tf.reset_default_graph()

In [12]:
X = tf.placeholder(tf.float32, shape=(None, height, width, channels), name="X")
y = tf.placeholder(tf.int64, shape=(None,n_output), name="y")

In [13]:
with tf.name_scope("dnn"):
    conv1 = tf.layers.conv2d(X, filters=conv_filters_1, kernel_size=conv_kernel_1, strides=conv_stride_1, \
                             padding=conv_padding_1, activation=tf.nn.relu, name="conv1")
    max_pool1 = tf.nn.max_pool(conv1, ksize=max_pool_ksize_1, strides=max_pool_strides_1, \
                               padding=max_pool_padding_1, name="maxpool1")
    conv2 = tf.layers.conv2d(max_pool1, filters=conv_filters_2, kernel_size=conv_kernel_2, \
                             strides=conv_stride_2, padding=conv_padding_2, activation=tf.nn.relu, name="conv2")
    max_pool2 = tf.nn.max_pool(conv2, ksize=max_pool_ksize_2, strides=max_pool_strides_2, \
                               padding=max_pool_padding_2, name="maxpool1")
    conv3 = tf.layers.conv2d(max_pool2, filters=conv_filters_3, kernel_size=conv_kernel_3, \
                             strides=conv_stride_3, padding=conv_padding_3, activation=tf.nn.relu, name="conv3")

    flat = tf.reshape(conv3, shape=[-1, conv_filters_3*8])    
    hidden1 = tf.layers.dense(flat, n_hidden_1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden_2, name="hidden2", activation=tf.nn.relu)
    y_predict = tf.layers.dense(hidden2, n_output, name="outputs")

In [14]:
with tf.name_scope("loss"):
    loss = tf.reduce_mean((y_predict-y)**2, name="loss")
loss_summary = tf.summary.scalar("loss_sum", loss)

In [15]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss, name="training_op")

In [16]:
with tf.name_scope("eval"):
    accuracy = abs(y[0]-y_predict[0])
accuracy_summary = tf.summary.scalar("accuracy_sum", accuracy)

In [17]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [18]:
logdir = log_dir()
file_saver = tf.summary.FileWriter(logdir, tf.get_default_graph())

## 2. Sauvegarde

In [19]:
checkpoint_path = "saves/network_two.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "saves/network_two"

In [20]:
if False:
    with tf.Session() as sess:

        if os.path.isfile(final_model_path + ".meta"):
                print("Training was interrupted. Session was restored.")

                saver = tf.train.import_meta_graph(final_model_path + ".meta")
                saver.restore(sess, final_model_path)
                #saver.restore(sess, final_model_path)
        else:
            sess.run(init)

        saver.save(sess, final_model_path)

## Entrainement

In [21]:
number_players = 2
number_player_names = ["two", "three", "four"]

In [22]:
path = "saves/network_" + number_player_names[number_players - 2]

checkpoint_path = path + ".ckpt"
epoch_path = path + ".epoch"
loss_path = path + ".loss"
meta_path = path + ".meta"

In [23]:
def train(X_data, y_data, n_epoch, batch_size):
    
    X_train, y_train, X_valid, y_valid = divide_data(X_data, y_data)
    batch_number = int(np.ceil(len(X_train)/batch_size))

    with tf.Session() as sess:
        if os.path.isfile(meta_path):            
            saver = tf.train.import_meta_graph(meta_path)
            saver.restore(sess, path)
        
            with open(epoch_path, 'rb') as f:
                start_epoch = int(f.read())
            with open(loss_path, 'rb') as f:
                best_loss = float(f.read())
            
            # start_epoch = 0
            # best_loss = np.infty

            print("Training was interrupted. Session was restored at epoch " + str(start_epoch) + " and loss " + str(best_loss))
        else:
            start_epoch = 0
            best_loss = np.infty
            sess.run(init)
        
        graph = tf.get_default_graph()

        for epoch in range(start_epoch, start_epoch + n_epoch + 1):
            for batch_index in range(batch_number):
                X_batch, y_batch = next_batch(X_train, y_train, batch_size)
                sess.run(training_op, feed_dict={X:X_batch, y:y_batch})

            
            accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X:X_valid, y:y_valid})
            file_saver.add_summary(loss_summary_str, epoch)
            file_saver.add_summary(accuracy_summary_str, epoch)

            if epoch % 5 == 0:
                print("Epoch : ", epoch,
                     "\tValidation accuracy : {:.3f}".format(accuracy_val * 100),
                     "\tLoss : {:.5f}".format(loss_val))
                saver.save(sess, checkpoint_path)
                with open(epoch_path, "wb") as f:
                    f.write(b"%d" % (epoch + 1))
                if loss_val < best_loss:
                    best_loss = loss_val
                    with open(loss_path, "wb") as f:
                        f.write(b"%f" % (best_loss))
                    saver.save(sess, path)

## Données

In [26]:
number_data = "minimax"
number_batch = 0

In [27]:
with open('data/board_save_' + str(number_data) + '_' + str(number_batch) + '.obj', 'rb') as board_save:
    board = pickle.load(board_save)

with open('data/players_save_' + str(number_data) + '_' + str(number_batch) + '.obj', 'rb') as players_save :
    players = pickle.load(players_save)

with open('data/pawn_save_' + str(number_data) + '_' + str(number_batch) + '.obj', 'rb') as pawn_save :
    pawn = pickle.load(pawn_save)

with open('data/move_save_' + str(number_data) + '_' + str(number_batch) + '.obj', 'rb') as move_save :
    move = pickle.load(move_save)

with open('data/origin_save_' + str(number_data) + '_' + str(number_batch) + '.obj', 'rb') as origin_save :
    origin = pickle.load(origin_save)

In [28]:
X_obs = []
for k in range(len(board)):
    X_obs.append(make_input(board[k], players[k], origin[k], pawn[k]))

In [29]:
len(X_obs)

402

In [30]:
rejected = []
y_obs = []
for k in range(len(board)):
    x_pos, y_pos = players[k][origin[k]].pawns[pawn[k]].x, players[k][origin[k]].pawns[pawn[k]].y
    direction, distance = move[k]
    if direction == 0:
        nx, ny = x_pos + distance, y_pos - distance
    if direction == 1:
        nx, ny = x_pos + 2*distance, y_pos
    if direction == 2:
        nx, ny = x_pos + distance, y_pos + distance
    if direction == 3:
        nx, ny = x_pos - distance, y_pos + distance
    if direction == 4:
        nx, ny = x_pos - 2 * distance, y_pos
    if direction == 5:
        nx, ny = x_pos - distance, y_pos - distance
    nc = ny * 15 + nx
    if nc >= 0 and nc <= 120:
        y_obs.append(nc)
    else:
        rejected.append(k)
y_obs = np.array(y_obs)

In [31]:
for k in rejected[::-1]:
    del(X_obs[k])

In [32]:
print("Données rejetées : " + str(len(rejected)))

Données rejetées : 0


In [33]:
X_obs = np.array(X_obs)

In [37]:
train(X_obs, y_obs, n_epoch=50, batch_size=50)

INFO:tensorflow:Restoring parameters from saves/network_two
Training was interrupted. Session was restored at epoch 186 and loss 4.121263
Epoch :  190 	Validation accuracy : 1.235 	Loss : 4.26915
Epoch :  195 	Validation accuracy : 2.469 	Loss : 4.28760
Epoch :  200 	Validation accuracy : 0.000 	Loss : 4.22639
Epoch :  205 	Validation accuracy : 1.235 	Loss : 4.30674
Epoch :  210 	Validation accuracy : 0.000 	Loss : 4.31410
Epoch :  215 	Validation accuracy : 1.235 	Loss : 4.29714
Epoch :  220 	Validation accuracy : 2.469 	Loss : 4.32289
Epoch :  225 	Validation accuracy : 0.000 	Loss : 4.33219
Epoch :  230 	Validation accuracy : 1.235 	Loss : 4.26370
Epoch :  235 	Validation accuracy : 0.000 	Loss : 4.35203
