First, we must construct our environment...

In [18]:
import numpy as np
import matplotlib
from rl_glue import RLGlue
from Agent import BaseAgent 
from Environment import BaseEnvironment  
from manager import Manager

import jdc

In [None]:
# Like shown previously, lets declare first, then implement
class CaptureGoEnvironment(BaseEnvironment):
    def env_init(self, env_info={}):
        return NotImplementedError
    
    def env_start(self):
        raise NotImplementedError

    def env_step(self, action):
        raise NotImplementedError

    def env_cleanup(self):
        raise NotImplementedError
    
    # Gets the value of a position on the game-board
    def board_index(self, x, y):
        raise NotImplementedError
    
    def inBounds(self, x, y):
        raise NotImplementedError
    
    def get_liberties(self, x, y):
        raise NotImplementedError
    
    def get_liberties_helper(self, x, y, group_color):
        raise NotImplementedError
    
    def did_group_captured(self, x, y):
        raise NotImplementedError

In [None]:
%%add_to CaptureGoEnvironment

# Create the environment, declare its variables, etc
def env_init(self, env_info={}):

    # Declare our variables
    reward = None

    # The board is an array of height x width
    # If (x,y) on the board is 0, no stone
    #   if its 1, then a white stone is there
    #   if its -1, then a black stone is there
    self.board = None
    
    termination = None

    # Which player's turn is it?
    # 0 = white's turn
    # 1 = black's turn
    self.turn = None

    # Who won?
    # 0 = nobody yet
    # 1 = white
    # -1 = black
    self.who_won = None

    self.reward_board_termination = (reward, board, termination)

    # Set the default board height and width
    self.board_height = env_info.get("board_height", 5) 
    self.board_width = env_info.get("board_width", 5)
    # Set the default reward per step
    self.reward_per_step = env_info.get("reward_per_step", -1)
    # Set the default winning reward and losing reward
    self.winning_reward = env_info.get("winning_reward", 100)
    self.losing_reward = env_info.get("losing_reward", -100)

In [None]:
%%add_to CaptureGoEnvironment

# Initialize the environment, set its variables, etc
# Called before an agent does anything
# Returns the initialized (full of zeros) board
def env_start(self):

    # Actually initalize
    reward = 0
    # Initialize the board, an array of 0 ints
    board = np.zeros((self.board_height, self.board_width), dtype=np.int8)
    termination = False
    
    self.reward_board_termination = (reward, board, termination)

    # We start as white's turn
    self.turn = 0
    # Nobody won yet
    self.who_won = 0

    return self.reward_board_termination[1]

In [26]:
%%add_to CaptureGoEnvironment
## Gets the index of a position on the game-board, given an x y coordinate
#   Returns the index where that position on the board is found
#   top right is x = 0 y = 0  

#   On the actual board array:
#       0 means that position is empty
#       1 means there is a white stone
#       -1 means there is a black stone
def board_index(self, x, y):
    return (self.board_width * y) + x

In [30]:
# Testing
env = CaptureGoEnvironment()
env.env_init({"board_height" : 5, "board_width" : 5 })
print(env.env_start())
print(env.board_index(3, 2))


[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
13


In [None]:
%%add_to CaptureGoEnvironment

# Returns true if the coordinate is in the board's bounds, false otherwise
def inBounds(self, x, y):
    if ((x < 0) or (y < 0)):
        return False

    if ((x >= self.board_width) or (y >= self.board_height)):
        return False

    return True


# Get the liberties of a group, starting at x, y
def get_liberties(self, x, y):
    # Set the group_color to be whatever color we started at
    get_liberties_helper(x, y, self.board, self.board(board_index(x, y)))

# Recursively get the liberties of a group of stones
#   returns the amount of liberties a group has
#   if group_color == 1, then we are looking for a white group
#   if group_color == -1, then we are looking for a black group
def get_liberties_helper(self, x, y, group_color):
    if not inBounds(x, y):
        return 0

    if self.board[board_index(this_x, this_y)] ==

# Returns if any group got captured, given the most recent move
#   returns 0 if no groups got captured
#   returns 1 if a white group got captured
#   returns -1 if a black group got captured
#
#   Parameters
#   x, y : The most recent move
def did_group_captured(self, x, y):
    # The (x, y) offsets we will check
    check_offsets_list = [
        [0, 0], 
        [0, 1], 
        [0, -1], 
        [1, 0], 
        [-1, 0]]

    for offset in check_offsets_list:
        this_x = x + offset[0]
        this_y = y + offset[1]

        if not inBounds(this_x, this_y):
            # This offset is not on the board
            continue

        # TODO: Make this respect who placed most recently, so that if you capture yourself and another stone the other stone gets captured first
        if (get_liberties(this_x, this_y) == 0):
            # A group got captured, return 1 if it was a white group, -1 if it was a black group
            return self.board[board_index(this_x, this_y)]


    return 0 # no captures

# Take a step in the environment, actually place a stone on the board
#   Attempt to place a stone at the x, y coordinates
#   if player == self.turn, then we actually do this move, otherwise its not that players turn so we do nothing
#   if a player attempts to play an invalid move, such as out of bounds or ontop of a stone, we just ignore it and dont change the turn
# 
#   Returns the reward_board_termination tuple that represents the changed environment
def env_step(self, x, y, player):
    if (player != turn):
        # Its not this player's turn
        return self.reward_board_termination

    # Check that the move is in bounds
    if not inBounds(x, y, self.board_width, self.board_height):
        # Just pretend like this turn didn't happen
        return self.reward_board_termination

    # Check that the position is empty
    if (self.board[board_index(this_x, this_y)] != 0):
        # Just pretend like this turn did not happen
        return self.reward_board_termination

    # If it's white's turn, stone = 1
    #   if it's black's turn, stone = -1
    stone = 0
    if (turn == 0):
        stone = 1
    else:
        stone = -1

    # Place the stone on the board
    self.board[board_index(this_x, this_y)] = stone


    # Get the reward
    reward = self.reward_per_step

    # Most complicated part: check if anything was captured, and therefore if we need to terminate or not
    termination = False
    captured_group = did_group_captured(x, y, self.board, self.board_width, self.board_height)
    if (captured_group == 1):
        # white got captured
        if (self.turn == 1): # If it is white's turn right now, we are giving white the reward
            reward += self.losing_reward
        else:
            reward += self.winning_reward
        
        termination = True
    elif (captured_group == -1):
        # black got captured
        if (self.turn == 1): # If it is white's turn right now, we are giving white the reward
            reward += self.winning_reward
        else:
            reward += self.losing_reward

        termination = True

    
    # The winner was who did not get captured, or nobody if nothing was captured
    self.who_won = captured_group * -1
    # It is now the other player's turn
    self.turn = not self.turn
    self.reward_board_termination = (reward, self.board, termination)



