In [1]:
import numpy as np
import torch as th
from torch.autograd import Variable
from matplotlib import pyplot as plt
import random
import sys
sys.path.append("/Users/brandonbrown/Desktop/Projects/pycolab/")
%matplotlib inline

  'Matplotlib is building the font cache using fc-list. '


In [2]:
def randPair(s,e):
    return np.random.randint(s,e), np.random.randint(s,e)

class BoardPiece:
    
    def __init__(self, name, code, pos):
        self.name = name #name of the piece
        self.code = code #an ASCII character to display on the board
        self.pos = pos #2-tuple e.g. (1,4)
        
class BoardMask:
    
    def __init__(self, name, mask, code):
        self.name = name
        self.mask = mask
        self.code = code
        
    def get_positions(self): #returns tuple of arrays
        return np.nonzero(self.mask)

def zip_positions2d(positions): #positions is tuple of two arrays
    x,y = positions
    return list(zip(x,y))

class GridBoard:
    
    def __init__(self, size=4):
        self.size = size #Board dimensions, e.g. 4 x 4
        self.components = {} #name : board piece
        self.masks = {}
    
    def addPiece(self, name, code, pos=(0,0)):
        newPiece = BoardPiece(name, code, pos)
        self.components[name] = newPiece
        
    #basically a set of boundary elements
    def addMask(self, name, mask, code):
        #mask is a 2D-numpy array with 1s where the boundary elements are
        newMask = BoardMask(name, mask, code)
        self.masks[name] = newMask
    
    def movePiece(self, name, pos):
        move = True
        for _, mask in self.masks.items():
            if pos in zip_positions2d(mask.get_positions()):
                move = False
        if move:
            self.components[name].pos = pos
    
    def delPiece(self, name):
        del self.components['name']
    
    def render(self):
        dtype = '<U2'
        displ_board = np.zeros((self.size, self.size), dtype=dtype)
        displ_board[:] = ' '
        
        for name, piece in self.components.items():
            displ_board[piece.pos] = piece.code
            
        for name, mask in self.masks.items():
            displ_board[mask.get_positions()] = mask.code
        
        return displ_board
    
    def render_np(self):
        num_pieces = len(self.components) + len(self.masks)
        displ_board = np.zeros((num_pieces, self.size, self.size), dtype=np.uint8)
        layer = 0
        for name, piece in self.components.items():
            pos = (layer,) + piece.pos
            displ_board[pos] = 1
            layer += 1
            
        for name, mask in self.masks.items():
            x,y = game.board.masks['boundary'].get_positions()
            z = np.repeat(layer,len(x))
            a = (z,x,y)
            displ_board[a] = 1
            layer += 1
        return displ_board

In [20]:
def addTuple(a,b):
    return tuple([sum(x) for x in zip(a,b)])
        
class Gridworld:
    
    def __init__(self, size=4, mode='static'):
        if size >= 4:
            self.board = GridBoard(size=size)
        else:
            print("Minimum board size is 4. Initialized to size 4.")
            self.board = GridBoard(size=4)
        
        #Add pieces, positions will be updated later
        self.board.addPiece('Player','P',(0,0))
        self.board.addPiece('Goal','+',(1,0))
        self.board.addPiece('Pit','-',(2,0))
        self.board.addPiece('Wall','W',(3,0))
            
        if mode == 'static':
            self.initGridStatic()
        elif mode == 'player':
            self.initGridPlayer()
        else:
            self.initGridRand()
    
    #Initialize stationary grid, all items are placed deterministically
    def initGridStatic(self):
        #Setup static pieces
        self.board.components['Player'].pos = (0,3) #Row, Column
        self.board.components['Goal'].pos = (0,0)
        self.board.components['Pit'].pos = (0,1)
        self.board.components['Wall'].pos = (1,1)
    
    #Check if board is initialized appropriately (no overlapping pieces)
    def validateBoard(self):
        all_positions = [piece.pos for name,piece in self.board.components.items()]
        if len(all_positions) > len(set(all_positions)):
            return False
        else:
            return True

    #Initialize player in random location, but keep wall, goal and pit stationary
    def initGridPlayer(self):
        #height x width x depth (number of pieces)
        self.initGridStatic()
        #place player
        self.board.components['Player'].pos = randPair(0,self.board.size)

        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridPlayer()

    #Initialize grid so that goal, pit, wall, player are all randomly placed
    def initGridRand(self):
        #height x width x depth (number of pieces)
        self.board.components['Player'].pos = randPair(0,self.board.size)
        self.board.components['Goal'].pos = randPair(0,self.board.size)
        self.board.components['Pit'].pos = randPair(0,self.board.size)
        self.board.components['Wall'].pos = randPair(0,self.board.size)

        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridRand()

    def makeMove(self, action):
        #need to determine what object (if any) is in the new grid spot the player is moving to
        #actions in {u,d,l,r}
        def checkMove(addpos=(0,0)):
            new_pos = addTuple(self.board.components['Player'].pos, addpos)
            if new_pos == self.board.components['Wall'].pos:
                pass #block move, player can't move to wall
            elif max(new_pos) > (self.board.size-1):    #if outside bounds of board
                pass
            elif min(new_pos) < 0: #if outside bounds
                pass
            else:
                self.board.movePiece('Player', new_pos)
        if action == 'u': #up
            checkMove((-1,0))
        elif action == 'd': #down
            checkMove((1,0))
        elif action == 'l': #left
            checkMove((0,-1))
        elif action == 'r': #right
            checkMove((0,1))
        else:
            pass

    def reward(self):
        if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
            return -10
        elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
            return 10
        else:
            return -1

    def display(self):
        return self.board.render()

In [29]:
game = Gridworld(size=4, mode='static')
game.display()

array([['+', '-', ' ', 'P'],
       [' ', 'W', ' ', ' '],
       [' ', ' ', ' ', ' '],
       [' ', ' ', ' ', ' ']], dtype='<U2')

In [30]:
game.makeMove('d')
game.makeMove('d')
game.makeMove('l')
game.display()

array([['+', '-', ' ', ' '],
       [' ', 'W', ' ', ' '],
       [' ', ' ', 'P', ' '],
       [' ', ' ', ' ', ' ']], dtype='<U2')

In [26]:
game.makeMove('d') # (0,3) + (-1,3)
print(game.reward())
game.display()

-1


array([['+', '-', ' ', ' '],
       [' ', 'W', ' ', ' '],
       [' ', ' ', ' ', ' '],
       [' ', ' ', 'P', ' ']], dtype='<U2')

In [32]:
game.board.render_np()

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 0, 0]],

       [[1, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 1, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=uint8)

In [13]:
mask1 = np.array([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 1],
        [1, 1, 1, 1]])
game.board.addMask('boundary', mask1, '#')
game.display()

array([['+', '-', ' ', ' '],
       [' ', 'W', ' ', 'P'],
       [' ', ' ', ' ', '#'],
       ['#', '#', '#', '#']], dtype='<U2')

In [14]:
game.makeMove('d') # (0,3) + (-1,3)
print(game.reward())
game.display()

-1


array([['+', '-', ' ', ' '],
       [' ', 'W', ' ', 'P'],
       [' ', ' ', ' ', '#'],
       ['#', '#', '#', '#']], dtype='<U2')

In [15]:
game.board.render_np()

array([[[0, 0, 0, 0],
        [0, 0, 0, 1],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[1, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 1, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 1],
        [1, 1, 1, 1]]], dtype=uint8)

In [9]:
class Sokoban:
    
    def __init__(self, size=5, mode='static'):
        if size >= 5:
            self.board = GridBoard(size=size)
        else:
            print("Minimum board size is 5. Initialized to size 5.")
            self.board = GridBoard(size=5)
        
        #Add pieces, positions will be updated later
        self.board.addPiece('Player','P',(0,0))
        self.board.addPiece('Goal','+',(0,4))
        self.board.addPiece('Box','B',(2,1))
        
        mask = np.array([
            [0, 0, 1, 0, 0],
            [0, 0, 1, 0, 0],
            [1, 0, 1, 1, 0],
            [1, 0, 0, 0, 0],
            [1, 0, 1, 1, 0]])
        self.board.addMask('boundary', mask, code='#')
            
        if mode != 'static':
            self.initGridRand()
    
    def initGridRand(self):
        self.board.components['Player'].pos = (np.random.randint(0,2), np.random.randint(0,2))
        self.board.components['Goal'].pos = (np.random.randint(0,2), np.random.randint(3,5))
        
    def makeMove(self, action):
        #need to determine what object (if any) is in the new grid spot the player is moving to
        #actions in {u,d,l,r}
        def checkMove(addpos=(0,0)):
            new_pos = addTuple(self.board.components['Player'].pos, addpos)
            if max(new_pos) > (self.board.size-1):    #if outside bounds of board
                pass
            elif min(new_pos) < 0: #if outside bounds
                pass
            else:
                self.board.movePiece('Player', new_pos)
        if action == 'u': #up
            checkMove((-1,0))
        elif action == 'd': #down
            checkMove((1,0))
        elif action == 'l': #left
            checkMove((0,-1))
        elif action == 'r': #right
            checkMove((0,1))
        else:
            pass
        
    def getReward(self):
        if (self.board.components['Player'].pos == self.board.components['Goal'].pos):
            return 10
        else:
            return -1