In [2]:
# ! pip install keras-rl2
# ! pip install chess
# ! pip install python-chess

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
# ls drive/MyDrive/Data/Chess

In [5]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten,\
     Input,BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam, SGD

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

# import gym_chess

import chess
from sys import platform
import os
import chess.engine


In [6]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5327364616367050006
]


In [None]:
# %%
os.system('chmod +x stockfish_14.1_linux_x64')
engine = chess.engine.SimpleEngine.popen_uci(r"./stockfish_14.1_linux_x64")


In [8]:
# # %%
# if platform == "linux" or platform == "linux2":
#     os.system('chmod +x ../stockfish/stockfish_14.1_linux_x64')
#     engine = chess.engine.SimpleEngine.popen_uci(r"../stockfish/stockfish_14.1_linux_x64")
# elif platform == "win32":
#     engine = chess.engine.SimpleEngine.popen_uci(r"../stockfish/stockfish_14.1_win_32bit.exe")

In [9]:
def find_move(env):
    result = engine.play(env.env, chess.engine.Limit(time=0.05))
    return result.move

In [10]:
STATE_SHAPE = (65, )
NB_ACTIONS = 4096

In [11]:
class ChessEnv:
    '''
    state - obser: ndarray - (65,): [:65] is flatten from int_board; [65] is color of bot; 1 is white and -1 is black
    step: int. step_range = (0, 4096) , is encoded from square A to square B (64 x 64 val)
    reward: int
    '''

    mapped = {
            'P': 10,     # White Pawn
            'p': -10,    # Black Pawn
            'N': 20,     # White Knight
            'n': -20,    # Black Knight
            'B': 30,     # White Bishop
            'b': -30,    # Black Bishop
            'R': 40,     # White Rook
            'r': -40,    # Black Rook
            'Q': 50,     # White Queen
            'q': -50,    # Black Queen
            'K': 900,     # White King
            'k': -900     # Black King
    }
    # state_shape = (8, 8)
    # nb_actions = 4096
    model = None
    
    def __init__(self, model: Sequential, neg_r_each_step = -1) -> None:
        self.env = chess.Board()
        self.state = self.reset()
        # [-1] = 1 -> white, -1 -> black
        self.bot_color = self.env.turn * 2 - 1
        self.neg_r_each_step = neg_r_each_step
        self.model = model

    def is_draw(self):
        if self.env.is_stalemate():
            print("statlemate")
            return True
        if self.env.is_fivefold_repetition():
            print("fivefold repetition")
            return True
        if self.env.is_seventyfive_moves():
            print("75 moves")
            return True
        if self.env.is_insufficient_material():
            print("Insufficient Material")
            return True
        return False

    def is_checkmate(self):
        # If There is checkmate then it will be TRUE else FALSE.It will be a boolean value.
        return self.env.is_checkmate()

    def convert_board_to_int(self):
        epd_string = self.env.epd()
        list_int = np.empty((0, ))
        for i in epd_string:
            if i == " ":
                list_int = list_int.reshape((8, 8))
                return list_int
            elif i != "/":
                if i in self.mapped:
                    list_int = np.append(list_int, self.mapped[i])
                else:
                    for counter in range(0, int(i)):
                        list_int = np.append(list_int, 0)
        list_int = list_int.reshape((8, 8))
        return list_int

    def get_state(self) -> np.ndarray:
        return np.append(self.convert_board_to_int().reshape(64,), self.env.turn * 2 - 1)

    def legal_moves(self):
        return list(self.env.legal_moves)

    def encodeMove(self, move_uci:str):
        if len(move_uci) != 4:
            raise ValueError()
        a, b = chess.parse_square(move_uci[:2]), chess.parse_square(move_uci[2:])
        return a * 64 + b

    def decodeMove(self, move_int:int):
        a, b = move_int//64, move_int%64
        # a, b = chess.square_name(a), chess.square_name(b)

        move = self.env.find_move(from_square= a,to_square= b)
        return move

    def render(self):
        print(self.env.unicode())

    def reset(self):
        # random state
        redo = True
        num_sample_steps = 0
        while redo:
            redo = False
            self.env = chess.Board()
            num_sample_steps = np.random.randint(0, 50)
            for i in range (num_sample_steps):
                lg_move = self.legal_moves()
                if len(lg_move) != 0:
                    move = np.random.choice(self.legal_moves())
                    self.env.push(move)
                else:
                    redo = True
                    break
        return self.get_state()

    def step(self, action: int):
        reward = 0
        done = True

        try:
            # move in legal move
            move = self.decodeMove(action)

            # neg reward each step
            reward = self.neg_r_each_step

            # location to_square
            to_r, to_c = move.to_square//8, move.to_square%8
            reward -= self.state[(7 - to_r)*8 + to_c ] * self.bot_color

            # action
            self.env.push(move)
            self.state = self.get_state()

            # check end game
            if self.is_checkmate():
                reward += self.mapped['K']
                done = True
                print('Win')
            elif self.is_draw():
                reward += 300
                done = True

            # opponent's turn   
            else:
                done = False

                move = find_move(self)

                # location to_square
                to_r, to_c = move.to_square//8, move.to_square%8
                reward -= self.state[(7 - to_r)*8 + to_c ] * self.bot_color

                # action
                self.env.push(move)
                self.state = self.get_state()

                # check end game
                if self.is_checkmate():
                    reward -= self.mapped['K']
                    done = True
                    print("Lose")
                elif self.is_draw():
                    reward += 300
                    done = True

        except:
            # wrong move
            reward = -5000
            done = True
            print('wrong_move')

        return self.state, reward, done, {}


In [12]:
# model
model = Sequential()
model.add(Input((1, ) + STATE_SHAPE))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(NB_ACTIONS))
model.add(Activation('linear'))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 65)                0         
_________________________________________________________________
dense (Dense)                (None, 128)               8448      
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 128)               512       
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
__________________________________________________

In [13]:
env = ChessEnv(model, neg_r_each_step=-1)

In [14]:
model.load_weights('chess_model.h5')

In [15]:
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
for i in range (10):
  policy = EpsGreedyQPolicy(0.01)
  dqn = DQNAgent(model=model, nb_actions=NB_ACTIONS, memory=memory,
                target_model_update=1e-2, policy=policy)
  dqn.compile(Adam(lr=1e-4), metrics=['mae'])

  # Okay, now it's time to learn something! We visualize the training here for show, but this
  # slows down training quite a lot. You can always safely abort the training prematurely using
  # Ctrl + C.
  his = dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
  
  model.save('chess_model.h5')

2021-12-14 13:28:06.652713: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 1999965000 Hz


Training for 50000 steps ...




wrong_move
     1/50000: episode: 1, duration: 0.255s, episode steps:   1, steps per second:   4, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 1027.000 [1027.000, 1027.000],  loss: --, mae: --, mean_q: --
wrong_move
     2/50000: episode: 2, duration: 0.021s, episode steps:   1, steps per second:  47, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 1439.000 [1439.000, 1439.000],  loss: --, mae: --, mean_q: --
wrong_move
     3/50000: episode: 3, duration: 0.008s, episode steps:   1, steps per second: 122, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 3093.000 [3093.000, 3093.000],  loss: --, mae: --, mean_q: --
wrong_move
     4/50000: episode: 4, duration: 0.013s, episode steps:   1, steps per second:  77, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 3155.000 [3155.000, 3155.000],  loss: --, mae: --, mean_q: --
wrong_move
 



  1001/50000: episode: 997, duration: 1.055s, episode steps:   1, steps per second:   1, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 3572.000 [3572.000, 3572.000],  loss: --, mae: --, mean_q: --
wrong_move
  1002/50000: episode: 998, duration: 0.036s, episode steps:   1, steps per second:  28, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 892.000 [892.000, 892.000],  loss: 12500346.000000, mae: 1.406158, mean_q: 0.834001
wrong_move
  1003/50000: episode: 999, duration: 0.041s, episode steps:   1, steps per second:  24, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 2059.000 [2059.000, 2059.000],  loss: 12500880.000000, mae: 1.405459, mean_q: 0.794996
wrong_move
  1004/50000: episode: 1000, duration: 0.046s, episode steps:   1, steps per second:  22, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 1203.000 [1203.000, 1203.000]



Training for 50000 steps ...




wrong_move
     1/50000: episode: 1, duration: 0.399s, episode steps:   1, steps per second:   3, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 3093.000 [3093.000, 3093.000],  loss: --, mae: --, mean_q: --
wrong_move
     2/50000: episode: 2, duration: 0.005s, episode steps:   1, steps per second: 187, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 2926.000 [2926.000, 2926.000],  loss: --, mae: --, mean_q: --
wrong_move
     3/50000: episode: 3, duration: 0.005s, episode steps:   1, steps per second: 204, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 2926.000 [2926.000, 2926.000],  loss: --, mae: --, mean_q: --
wrong_move
     4/50000: episode: 4, duration: 0.010s, episode steps:   1, steps per second: 104, episode reward: -5000.000, mean reward: -5000.000 [-5000.000, -5000.000], mean action: 3093.000 [3093.000, 3093.000],  loss: --, mae: --, mean_q: --
wrong_move
 