In [None]:
from abc import abstractmethod
from pathlib import Path
import os
import numpy as np
import keras.models as Km
import keras.layers as kl
import random

from build.Board import Board

In [None]:
class Model:
    """
    Model class for all 2 player based games with neural network training
    """

    def __init__(self, tag):
        """
        :param tag: used tag for neural network model (e.g. 1 for first player and -1 for second)
        """
        self.tag = tag
        self.epsilon = 0.1
        self.alpha = 0.5
        self.gamma = 1
        self.model = self.__load_model()
        self.history = []
        self.memory = []
        self.count_memory = 0
        self.batch_size = 10

    def __load_model(self):
        """
        Loads previously saved model
        :return: loaded model
        """
        if self.tag == 1:
            tag = '_first'
        else:
            tag = '_second'

        s = 'model_values' + tag + '.h5'
        model_file = Path(s)

        if model_file.is_file():
            print('load model')
            model = Km.load_model(s)
            print('load model: ' + s)
        else:
            model = self.create_model()
        return model

    @abstractmethod
    def create_model(self):
        """
        Create new model with appropriate number of layers and network structure
        :return: created model
        """
        pass

    @abstractmethod
    def state_to_tensor(self, state, move):
        """
        Creates a tensor (2 dim array) based on a state and a move as input vector for nn
        :param state: current state
        :param move: current move
        :return: created tensor
        """
        pass

    @abstractmethod
    def one_hot_encode_state(self, state):
        """
        One hot encoding for the state.
        Each field input of 3x3 matrix will be displayed with 0 (blank), 1 (player 1), -1 (player 2)
        :param state: state to encode
        :return: encoded state
        """
        pass

    def __choose_optimal_move(self, state) -> int:
        """
        Choose optimal move based on the calculated and best predicted values of current state.
        :param state: current state
        :return: best move with highest value (randomly select for equal values)
        """
        v = -float('Inf') # most negative value (negative infinity float)
        v_list = [] # list of all calculated values
        idx = [] # move index for chosen move
        for move in Board.POSSIBLE_ACTIONS:
            value = self.model.calc_value(state, move)
            v_list.append(round(float(value), 5))

            if value > v:
                v = value
                idx = [move]
            elif v == value:
                idx.append(move)

        idx = random.choice(idx)
        return idx

    def __calc_value(self, state, move) -> float:
        """
        Calculate a tensor and predict the reward
        :param state: current state
        :param move: current move
        :return: most predicted value (predicted reward)
        """

        tensor = self.state_to_tensor(state, move)
        value = self.model.predict(tensor)
        return value

    def __calc_target(self, prev_state, prev_move, state, reward):
        """
        Calculate the target vector (q value or reward)
        :param prev_state: previous state
        :param prev_move: previous move
        :param state: current state
        :param reward: previous reward
        :return: calculated target value
        """
        qvalue = self.__calc_value(prev_state, prev_move)
        v = []
        tensor = self.state_to_tensor(prev_state, prev_move)

        for move in range(len(tensor[:,0][0])):
            v.append(self.__calc_value(state, move))

        if reward == 0:
            v_s_tag = self.gamma * np.max(v)
            target = np.array(qvalue + self.alpha * (reward + v_s_tag - qvalue))
        else:
            target = reward

        return target

    def __save_model(self):
        """
        save model as h5 file
        """
        if self.tag == 1:
            tag = '_first'
        else:
            tag = '_second'
        s = 'model_values' + tag + '.h5'

        try:
            os.remove(s)
        except:
            pass

        self.model.save(s)

    def predict_model(self, state, online: bool):
        if online:
            state = self.one_hot_encode_state(state)
            target = self.model.predict(state)
        else:
            target = self.__choose_optimal_move(state)

        return target

    def __learn_batch(self, memory):
        """
        Learn model with a batch of states and actions from memory
        :param memory: saved states, actions and rewards
        """
        print('start learning player', self.tag)
        print('data length:', len(memory))

        # build x_train
        ind = 0
        x_train = np.zeros((len(memory), 2, 9))
        for v in memory:
            [prev_state, prev_move, _, _] = v
            sample = self.state_to_tensor(prev_state, prev_move)
            x_train[ind, :, :] = sample
            ind += 1

        # train with planning
        loss = 20
        count = 0
        while loss > 0.02 and count < 10:
            y_train = self.__create_targets(memory)
            history = self.model.fit(x_train, y_train, epochs=5, batch_size=256, verbose=0)
            self.history.append(history.history)
            loss = self.model.evaluate(x_train, y_train, batch_size=256, verbose=0)[0]
            count += 1
            print('planning number:', count, 'loss', loss)

        loss = self.model.evaluate(x_train, y_train, batch_size=256, verbose=0)
        print('player:', self.tag, loss, 'loops', count)

        self.__save_model()

    def train_model_offline(self, prev_state, prev_move, state, reward):

        self.__load_to_memory(prev_state, prev_move, state, reward)
        self.count_memory += 1

        if self.count_memory == self.batch_size:
            self.count_memory = 0
            self.__learn_batch(self.memory)
            self.memory = []

    def __load_to_memory(self, prev_state, prev_move, state, reward):
        """
        Load all q related things into memory to learn in batch
        :param prev_state: previous known state
        :param prev_move: previous made move
        :param state: new state
        :param reward: previous reward
        """
        self.memory.append([prev_state, prev_move, state, reward])

    def __create_targets(self, memory):
        """
        Create target vector for each state-action-pair in memory
        :param memory: saved states, actions and rewards
        :return: target vector
        """
        y_train_ = np.zeros((len(memory), 1))
        count_ = 0
        for v_ in memory:
            [prev_state_, prev_move_, state_, reward_] = v_
            target = self.__calc_target(prev_state_, prev_move_, state_, reward_)
            y_train_[count_, :] = target
            count_ += 1

        return y_train_

    def train_model_online(self, prev_state, new_state, prev_move, reward, discount_factor=0.8):
        """
        Train the model based on the current state, action and received reward
        :param prev_state: previous state
        :param new_state: new state
        :param prev_move: previous move
        :param discount_factor: discount factor of this q learner
        :param reward: received reward after prev action
        """
        prev_state = self.one_hot_encode_state(prev_state)
        new_state = self.one_hot_encode_state(new_state)

        target = reward + discount_factor * np.max( self.model.predict(new_state))
        target_vector = self.model.predict(prev_state)[0]
        target_vector[prev_move] = target
        history = self.model.fit(prev_state, target_vector.reshape(-1, len(Board.POSSIBLE_ACTIONS)), epochs=1, verbose=0)
        self.history = self.history.append(history.history)

In [None]:
class TicTacToeModel(Model):
    """
    Special model for tic tac toe games.

    Consists of 2x9 input vector, dense network of 9 layers and a 1 sized target vector.
    Input vector consists an array with length 9 for the chosen move and an array for the state.
    Target vector consists of one value for the q value.
    """

    def __init__(self, tag):
        super().__init__(tag)
        pass

    def create_model(self):
        """
        Creates keras model
        :return: keras model
        """
        print('new model')

        model = Km.Sequential()
        model.add(kl.Flatten(input_shape=(2, 9)))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(18))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(9))
        model.add(kl.LeakyReLU(alpha=0.3))
        model.add(kl.Dense(1, activation='linear'))

        model.compile(optimizer='Adam', loss='mean_absolute_error', metrics=['accuracy'])

        model.summary()

        return model

    def state_to_tensor(self, state, move):
        """
        Generates a tensor of state and move index
        :param state: current state
        :param move: current move
        :return: tensor (2 dim array)
        """
        state = self.one_hot_encode_state(state)

        a = np.zeros(9).astype('float32')
        a[move] = 1 # one hot encoding for chosen action (1 for the chosen action an 0 for none)

        state = np.asarray(state).astype('float32')
        tensor = np.array((a, state))
        tensor = tensor.reshape((1, 2, 9))

        return tensor

    def one_hot_encode_state(self, state):
        """
        One hot encoding for the state.
        Each field input of 3x3 matrix will be displayed with 0 (blank), 1 (player 1), -1 (player 2)
        :param state: state to encode
        :return: encoded state
        """
        state = state.flatten() # flatten 3x3 matrix because of 1 length input vector for state

        for i in range(len(state)):
            if state[i] is None:
                state[i] = 0
            if state[i] == 'x':
                state[i] = 1
            if state[i] == 'o':
                state[i] = -1

        state = state.reshape((1, 9))

        return state

In [None]:
class TicTacToeModelSmall(TicTacToeModel):
    """
    Special model for tic tac toe games.

    Consists of 1x9 input vector, dense network of 1 layer and a 9 sized target vector.
    Input vector consists an array with length 9 for the state.
    Target vector consists of 9 sized array for 9 possible rewards (one for each action).
    """

    def __init__(self, tag, observation_space=9, action_space=9):
        self.observation_space = observation_space
        self.action_space = action_space
        super().__init__(tag)
        pass

    def create_model(self):
        """
        Creates keras model
        :return: keras model
        """
        print('new model')

        model = Km.Sequential()
        model.add(kl.InputLayer(batch_input_shape=(1, self.observation_space)))
        model.add(kl.Dense(20, activation='relu'))
        model.add(kl.Dense(self.action_space, activation='linear'))
        model.compile(loss='mse', optimizer='adam', metrics=['mae'])

        model.summary()

        return model