In [24]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


In [25]:
import chess

In [26]:
import random
board = chess.Board() 
move = random.choice(list(board.legal_moves))
move

Move.from_uci('b2b4')

In [27]:
import random
def random_player(board):
    move = random.choice(list(board.legal_moves))
    return move.uci()

In [28]:
#This function allows us to play if we give the right uci input
def human_player(board):
    st = input()
    move = chess.Move.from_uci(st)
    return move.uci()

In [29]:
import time
from IPython.display import display, HTML, clear_output

In [30]:
def who(player):
    return "White" if player == chess.WHITE else "Black"
def display_board(board, use_svg):
    if use_svg:
        return board._repr_svg_()
    else:
        return "<pre>" + str(board) + "</pre>"
    
def play_game(player1, player2, visual="svg", pause=0.1):
    """
    playerN1, player2: functions that takes board, return uci move
    visual: "simple" | "svg" | None
    """
    use_svg = (visual == "svg")
    board = chess.Board()
    board_stop = display_board(board, use_svg)
    html = "%s" % (board_stop)
    display(HTML(html))
    try:
        while not board.is_game_over(claim_draw=True):
            if board.turn == chess.WHITE:
                uci = player1(board)
            else:
                uci = player2(board)
            name = who(board.turn)
            board.push_uci(uci)
            board_stop = display_board(board, use_svg)
            html = "<h1>Move %s %s, Play '%s':</h1><br/>%s" % (
                       len(board.move_stack), name, uci, board_stop)
            if visual is not None:
                if visual == "svg":
                    clear_output(wait=True)
                display(HTML(html))
                if visual == "svg":
                    time.sleep(pause)
    except KeyboardInterrupt:
        msg = "Game interrupted!"
        return (None, msg, board)
    result = None
    if board.is_checkmate():
        msg = "checkmate: " + who(not board.turn) + " wins!"
        result = not board.turn
    elif board.is_stalemate():
        msg = "draw: stalemate"
    elif board.is_fivefold_repetition():
        msg = "draw: 5-fold repetition"
    elif board.is_insufficient_material():
        msg = "draw: insufficient material"
    elif board.can_claim_draw():
        msg = "draw: claim"
    if visual is not None:
        print(msg)
    return (result, msg, board)

In [31]:
#play_game(human_player, random_player)

In [32]:
x = chess.polyglot.zobrist_hash(board)

AttributeError: module 'chess' has no attribute 'polyglot'

In [33]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os

from RLC.real_chess import agent, environment, learn, tree
import chess
from chess.pgn import Game

opponent = agent.GreedyAgent()
env = environment.Board(opponent, FEN=None)
player = agent.Agent(lr=0.001, network='big')
player.fix_model()
learner = learn.TD_search(env, player, gamma=0.8, search_time=1.5)
node = tree.Node(learner.env.board, gamma=learner.gamma)

w_before = learner.agent.model.get_weights()
n_iters = 105

In [34]:
#learner.learn(iters=1, timelimit_seconds=100)

In [35]:
#learner.play_game(1,maxiter=100)

print(opponent.predict(np.expand_dims(env.layer_board, axis=0)))
learner.search_time = 60
learner.play_game(n_iters)
pgn = Game.from_board(learner.env.board)
with open("rlc_pgn","w") as log:
    log.write(str(pgn))

## Current Idea :

1. We have an interface to run our chess game
2. The only thing we need now is an intelligent agent which we pass to the *player2* argument of function **play_game()** 
3. For this , I believe the policy DNN model stored in model.h5 file is crucial to building player2

In [36]:
import tensorflow as tf
from keras.layers import Input, Dense, Flatten, Concatenate, Conv2D, Dropout
from keras.losses import mean_squared_error
from keras.models import Model, clone_model, load_model
from keras.optimizers import SGD, Adam, RMSprop
import numpy as np

gamma = 0.9
search_time=60
min_sim_count=10
temperature=1

#parameters to be passed to object of AIPlayer
model = tf.keras.models.load_model('RLC_model.h5')
env = environment.Board(opponent, FEN=None)
node = tree.Node(learner.env.board, gamma=learner.gamma)

TypeError: Unexpected keyword argument passed to optimizer: learning_rate

In [37]:
import keras

In [38]:
print(keras.__version__)

2.3.1


In [39]:
print(tf.__version__)

1.13.1


## Final class that represents the human vs AI agent

In [33]:
 from RLC.real_chess.tree import Node
    
class AIPlayer(object):
    
    def __init__(self,env,model,tree):
        self.env=env
        self.gamma = 0.9
        self.search_time=60
        self.min_sim_count=10
        self.temperature=1
        self.model = model
        self.tree = Node(self.env.board, gamma=learner.gamma)
        self.k=1

  
    def mcts(self, node):
        """
        Run Monte Carlo Tree Search
        Args:
            node: A game state node object

        Returns:
            the node with playout sims

        """

        starttime = time.time()
        sim_count = 0
        board_in = self.env.board.fen()

        # First make a prediction for each child state
        for move in self.env.board.generate_legal_moves():
            if move not in node.children.keys():
                node.children[move] = tree.Node(self.env.board, parent=node)

            episode_end, reward = self.env.step(move)

            if episode_end:
                successor_state_value = 0
            else:
                successor_state_value = np.squeeze(
                    self.model.predict(np.expand_dims(self.env.layer_board, axis=0))
                )

            child_value = reward + gamma * successor_state_value

            node.update_child(move, child_value)
            self.env.board.pop()
            self.env.init_layer_board()
        if not node.values:
            node.values = [0]

        while starttime + search_time > time.time() or sim_count < min_sim_count:
            depth = 0
            color = 1
            node_rewards = []

            # Select the best node from where to start MCTS
            while node.children:
                node, move = node.select(color=color)
                if not move:
                    # No move means that the node selects itself, not a child node.
                    break
                else:
                    depth += 1
                    color = color * -1  # switch color
                    episode_end, reward = self.env.step(move)  # Update the environment to reflect the node
                    node_rewards.append(reward)
                    # Check best node is terminal

                    if self.env.board.result() == "1-0" and depth == 1:  # -> Direct win for white, no need for mcts.
                        self.env.board.pop()
                        self.env.init_layer_board()
                        node.update(1)
                        node = node.parent
                        return node
                    elif episode_end:  # -> if the explored tree leads to a terminal state, simulate from root.
                        while node.parent:
                            self.env.board.pop()
                            self.env.init_layer_board()
                            node = node.parent
                        break
                    else:
                        continue

            # Expand the game tree with a simulationd
            Returns, move = node.simulate(self.model,
                                            self.env,
                                            temperature=temperature,
                                            depth=0)
            self.env.init_layer_board()

            if move not in node.children.keys():
                node.children[move] = tree.Node(self.env.board, parent=node)

            node.update_child(move, Returns)

            # Return to root node and backpropagate Returns
            while node.parent:
                latest_reward = node_rewards.pop(-1)
                Returns = latest_reward + gamma * Returns
                node.update(Returns)
                node = node.parent

                self.env.board.pop()
                self.env.init_layer_board()
            sim_count += 1

        board_out = self.env.board.fen()
        assert board_in == board_out

        return node

    def opponent_move(self):
        start_mcts_after = -1
        if self.k > start_mcts_after:
            self.tree = self.mcts(self.tree)
            # Step the best move
            max_move = None
            max_value = np.NINF
            for move, child in self.tree.children.items():
                sampled_value = np.mean(child.values)
                if sampled_value > max_value:
                    max_value = sampled_value
                    max_move = move
        else:
            max_move = np.random.choice([move for move in self.env.board.generate_legal_moves()])

        return max_move
    
    def greedy_opponent(self):
        max_move = None
        max_value = np.NINF
        for move in self.env.board.generate_legal_moves():
            self.env.step(move)
            if self.env.board.result() == "0-1":
                max_move = move
                self.env.board.pop()
                self.env.init_layer_board()
                break
            successor_state_value_opponent = self.env.opposing_agent.predict(
                np.expand_dims(self.env.layer_board, axis=0))
            if successor_state_value_opponent > max_value:
                max_move = move
                max_value = successor_state_value_opponent

            self.env.board.pop()
            self.env.init_layer_board()

        return max_move
    
    def who(player):
        return "White" if player == chess.WHITE else "Black"
    
    def display_board(board, use_svg):
        if use_svg:
            return board._repr_svg_()
        else:
            return "<pre>" + str(board) + "</pre>"

    def play_game_ai(self,visual="svg", pause=0.1):
        """
        playerN1, player2: functions that takes board, return uci move
        visual: "simple" | "svg" | None
        """
        use_svg = (visual == "svg")
        board_stop = display_board(self.env.board, use_svg)
        html = "%s" % (board_stop)
        display(HTML(html))
        try:
            while not self.env.board.is_game_over(claim_draw=True):
                if self.env.board.turn != chess.WHITE:
                    uci = self.human_player()
                else:
                    move = self.opponent_move()
                    uci = move.uci()
                name = who(self.env.board.turn)
                self.env.board.push_uci(uci)
                board_stop = display_board(self.env.board, use_svg)
                html = "<h1>Move %s %s, Play '%s':</h1><br/>%s" % (
                           len(self.env.board.move_stack), name, uci, board_stop)
                if visual is not None:
                    if visual == "svg":
                        clear_output(wait=True)
                    display(HTML(html))
                    if visual == "svg":
                        time.sleep(pause)
        except KeyboardInterrupt:
            msg = "Game interrupted!"
            return (None, msg, self.env.board)
        result = None
        if self.env.board.is_checkmate():
            msg = "checkmate: " + who(not self.env.board.turn) + " wins!"
            result = not self.env.board.turn
        elif self.env.board.is_stalemate():
            msg = "draw: stalemate"
        elif self.env.board.is_fivefold_repetition():
            msg = "draw: 5-fold repetition"
        elif self.env.board.is_insufficient_material():
            msg = "draw: insufficient material"
        elif self.env.board.can_claim_draw():
            msg = "draw: claim"
        if visual is not None:
            print(msg)
        #return (result, msg, self.env.board)
        return (result,message)

    def human_player(self):
        st = input()
        move = chess.Move.from_uci(st)
        return move.uci()

In [34]:
ai_player = AIPlayer(env,model,node)

In [35]:
#play_game_ai(ai_player.opponent_move,human_player)
env.board.reset()

In [36]:
#uncomment and run below code to play aginst greedy opponent

#ai_player.play_game_ai()

StdinNotImplementedError: raw_input was called, but this frontend does not support input requests.

In [37]:
env.board.reset()

In [38]:
import numpy as np
import time
from RLC.real_chess.tree import Node
import math
import gc


def softmax(x, temperature=1):
    return np.exp(x / temperature) / np.sum(np.exp(x / temperature))


def sigmoid(x):
    return 1 / (1 + math.exp(-x))


class TD_search_m(object):

    def __init__(self, env, agent, gamma=0.9, search_time=1, memsize=2000, batch_size=256, temperature=1):
        """
        Chess algorithm that combines bootstrapped monte carlo tree search with Q Learning
        Args:
            env: RLC chess environment
            agent: RLC chess agent
            gamma: discount factor
            search_time: maximum time spent doing tree search
            memsize: Amount of training samples to keep in-memory
            batch_size: Size of the training batches
            temperature: softmax temperature for mcts
        """
        self.env = env
        self.agent = agent
        self.tree = Node(self.env)
        self.gamma = gamma
        self.memsize = memsize
        self.batch_size = batch_size
        self.temperature = temperature
        self.reward_trace = []  # Keeps track of the rewards
        self.piece_balance_trace = []  # Keep track of the material value on the board
        self.ready = False  # Whether to start training
        self.search_time = search_time
        self.min_sim_count = 10

        self.mem_state = np.zeros(shape=(1, 8, 8, 8))
        self.mem_sucstate = np.zeros(shape=(1, 8, 8, 8))
        self.mem_reward = np.zeros(shape=(1))
        self.mem_error = np.zeros(shape=(1))
        self.mem_episode_active = np.ones(shape=(1))

    #def display_board(self):
     #   return "<pre>" + str(self.env.board) + "</pre>"
        
    def play_game(self, k, maxiter=80):
        """
        Play a chess game and learn from it
        Args:
            k: the play iteration number
            maxiter: maximum duration of the game (halfmoves)

        Returns:
            board: Chess environment on terminal state
        """
        episode_end = False
        turncount = 0
        tree = Node(self.env.board, gamma=self.gamma)  # Initialize the game tree

        # Play a game of chess
        # According to test.py - Decides the best max_move and max_value using MCTS
        while not episode_end:
            state = np.expand_dims(self.env.layer_board.copy(), axis=0)
            state_value = self.agent.predict(state)

            board_stop = display_board(self.env.board,"svg")
            html = "%s" % (board_stop)
            display(HTML(html))
            # White's turn involves tree-search
            if self.env.board.turn:

                # Do a Monte Carlo Tree Search after game iteration k
                start_mcts_after = -1
                if k > start_mcts_after:
                    tree = self.mcts(tree)
                    # Step the best move
                    max_move = None
                    max_value = np.NINF
                    for move, child in tree.children.items():
                        sampled_value = np.mean(child.values)
                        if sampled_value > max_value:
                            max_value = sampled_value
                            max_move = move
                else:
                    max_move = np.random.choice([move for move in self.env.board.generate_legal_moves()])

            # Black's turn is myopic
            # According to test.py - uses greedy approach to decide the
            # best max_move, and its corresponding max_value
            else:
                max_move = None
                max_value = np.NINF
                
                st = input()
                move = chess.Move.from_uci(st)
                max_move = move
                
            if not (self.env.board.turn and max_move not in tree.children.keys()) or not k > start_mcts_after:
                tree.children[max_move] = Node(gamma=0.9, parent=tree)

            episode_end, reward = self.env.step(max_move)

            tree = tree.children[max_move]
            tree.parent = None
            gc.collect()

            sucstate = np.expand_dims(self.env.layer_board, axis=0)
            new_state_value = self.agent.predict(sucstate)

            error = reward + self.gamma * new_state_value - state_value
            error = np.float(np.squeeze(error))

            turncount += 1
            if turncount > maxiter and not episode_end:
                episode_end = True

            episode_active = 0 if episode_end else 1

            # construct training sample state, prediction, error
            self.mem_state = np.append(self.mem_state, state, axis=0)
            self.mem_reward = np.append(self.mem_reward, reward)
            self.mem_sucstate = np.append(self.mem_sucstate, sucstate, axis=0)
            self.mem_error = np.append(self.mem_error, error)
            self.reward_trace = np.append(self.reward_trace, reward)
            self.mem_episode_active = np.append(self.mem_episode_active, episode_active)

            if self.mem_state.shape[0] > self.memsize:
                self.mem_state = self.mem_state[1:]
                self.mem_reward = self.mem_reward[1:]
                self.mem_sucstate = self.mem_sucstate[1:]
                self.mem_error = self.mem_error[1:]
                self.mem_episode_active = self.mem_episode_active[1:]
                gc.collect()

            if turncount % 10 == 0:
                self.update_agent()

        piece_balance = self.env.get_material_value()
        self.piece_balance_trace.append(piece_balance)
        print("game ended with result", reward, "and material balance", piece_balance, "in", turncount, "halfmoves")

        return self.env.board

    def update_agent(self):
        """
        Update the Agent with TD learning
        Returns:
            None
        """
        if self.ready:
            choice_indices, states, rewards, sucstates, episode_active = self.get_minibatch()
            td_errors = self.agent.TD_update(states, rewards, sucstates, episode_active, gamma=self.gamma)
            self.mem_error[choice_indices.tolist()] = td_errors

    def get_minibatch(self, prioritized=True):
        """
        Get a mini batch of experience
        Args:
            prioritized:

        Returns:

        """
        if prioritized:
            sampling_priorities = np.abs(self.mem_error) + 1e-9
        else:
            sampling_priorities = np.ones(shape=self.mem_error.shape)
        sampling_probs = sampling_priorities / np.sum(sampling_priorities)
        sample_indices = [x for x in range(self.mem_state.shape[0])]
        choice_indices = np.random.choice(sample_indices,
                                          min(self.mem_state.shape[0],
                                              self.batch_size),
                                          p=np.squeeze(sampling_probs),
                                          replace=False
                                          )
        states = self.mem_state[choice_indices]
        rewards = self.mem_reward[choice_indices]
        sucstates = self.mem_sucstate[choice_indices]
        episode_active = self.mem_episode_active[choice_indices]

        return choice_indices, states, rewards, sucstates, episode_active

    def mcts(self, node):
        """
        Run Monte Carlo Tree Search
        Args:
            node: A game state node object

        Returns:
            the node with playout sims

        """

        starttime = time.time()
        sim_count = 0
        board_in = self.env.board.fen()

        # First make a prediction for each child state
        for move in self.env.board.generate_legal_moves():
            if move not in node.children.keys():
                node.children[move] = Node(self.env.board, parent=node)

            episode_end, reward = self.env.step(move)

            if episode_end:
                successor_state_value = 0
            else:
                successor_state_value = np.squeeze(
                    self.agent.model.predict(np.expand_dims(self.env.layer_board, axis=0))
                )

            child_value = reward + self.gamma * successor_state_value

            node.update_child(move, child_value)
            self.env.board.pop()
            self.env.init_layer_board()
        if not node.values:
            node.values = [0]

        while starttime + self.search_time > time.time() or sim_count < self.min_sim_count:
            depth = 0
            color = 1
            node_rewards = []

            # Select the best node from where to start MCTS
            while node.children:
                node, move = node.select(color=color)
                if not move:
                    # No move means that the node selects itself, not a child node.
                    break
                else:
                    depth += 1
                    color = color * -1  # switch color
                    episode_end, reward = self.env.step(move)  # Update the environment to reflect the node
                    node_rewards.append(reward)
                    # Check best node is terminal

                    if self.env.board.result() == "1-0" and depth == 1:  # -> Direct win for white, no need for mcts.
                        self.env.board.pop()
                        self.env.init_layer_board()
                        node.update(1)
                        node = node.parent
                        return node
                    elif episode_end:  # -> if the explored tree leads to a terminal state, simulate from root.
                        while node.parent:
                            self.env.board.pop()
                            self.env.init_layer_board()
                            node = node.parent
                        break
                    else:
                        continue

            # Expand the game tree with a simulation
            Returns, move = node.simulate(self.agent.fixed_model,
                                          self.env,
                                          temperature=self.temperature,
                                          depth=0)
            self.env.init_layer_board()

            if move not in node.children.keys():
                node.children[move] = Node(self.env.board, parent=node)

            node.update_child(move, Returns)

            # Return to root node and backpropagate Returns
            while node.parent:
                latest_reward = node_rewards.pop(-1)
                Returns = latest_reward + self.gamma * Returns
                node.update(Returns)
                node = node.parent

                self.env.board.pop()
                self.env.init_layer_board()
            sim_count += 1

        board_out = self.env.board.fen()
        assert board_in == board_out

        return node


In [39]:
learner = TD_search_m(env, player, gamma=0.8, search_time=1.5)

In [40]:
learner.play_game(1)