##### To do:
    - Prevent obstacles from blocking all pathways to terminal state and trapping player
    - Implement rewards
    - Implement Goal state that ends the game
    - Implement non-deterministic tiles
    - Think about if reward world is necessary (should the rewards be seperate from the normal GW?)
    

In [150]:
import numpy as np
import copy

In [151]:
# Create Gridworld object

class Gridworld:
    
    def __init__(self,dim):
        
        self.dim = dim
        self.gid_world = []
        self.reward_world = []
        
        self.gid_world_original = []
        self.reward_world_original = []
        
        self.free_fields = []
        self.goal = []
    
    def build(self):
        
        ######### Build GW #########

        for x in range(self.dim+1):
            self.grid_world = ["O"] * (x * x)

        # Create obstacles

        self.grid_world = np.array(self.grid_world)

        num_obs = int((self.dim*self.dim)/8) # specify number of obstacles

        obstacle_indices = np.random.choice(np.arange(self.grid_world.size), replace=False,
                                   size=num_obs)

        self.grid_world[obstacle_indices] = "X"

        # Save Backup

        self.grid_world_original = copy.deepcopy(self.grid_world)
        
        ######### Build RW #########

        # Assign rewards to states

        self.reward_world =copy.deepcopy(self.grid_world.flatten()) # create seperate array for reward

        self.free_fields = [x for x in np.arange(self.reward_world.size) if x not in obstacle_indices] # generate list of free fields

        np.random.shuffle(self.free_fields) # shuffle free fields

        self.goal = self.free_fields[-1] # choose index for postive reward (terminal state)
        
        rew_neg_amount = 5 # specify number of negative rewards

        rew_neg_indices = np.random.choice(self.free_fields[0:-1], replace=False, size=rew_neg_amount) # randomly choose incides for negative rewards

        self.reward_world[rew_neg_indices] = "-" # place negative rewards

        self.reward_world[self.goal] = "+" # place positive reward (terminal state)

        # Save Backup

        self.reward_world_original = copy.deepcopy(self.reward_world) 
        
        # Set starting point

        self.grid_world[0] = "P" # top left
        
        return self.grid_world, self.reward_world
    
    def reset(self):
        
        self.reward_world = self.reward_world_original
        self.grid_world = self.grid_world_original

        return self.grid_world, self.reward_world

    def move(self, action):
        
        idx = np.where(self.grid_world == "P")[0]
        idx = idx[0]
        
        if action == "left":
            
             # if path is Oob
            
            if idx == 0 or (idx%self.dim) == 0:
        
                return
            
            else:
                
                if self.grid_world[idx-1]  == "X":
                    
                    return
                   
                # if path isnt blocked
                
                else:

                    self.grid_world[idx] = "O"

                    self.grid_world[idx-1] = "P"
                
                return
            
        elif action == "right":
            
            # if path is Oob
            
            if idx == (self.dim-1) or idx == (len(self.grid_world)-1):
                
                return
            
            else:
                
                if self.grid_world[idx+1] == "X":
                    
                    return
                
                # if path isnt blocked
                
                else:

                    self.grid_world[idx] = "O"

                    self.grid_world[idx+1] = "P"
            
        elif action == "up":
            
             # if path is Oob
            
            if idx in range(0,(self.dim-1)):
                
                return
            
            else:
                
                if self.grid_world[idx-self.dim] == "X":
                    
                    return
                
                # if path isnt blocked
                
                else:

                    self.grid_world[idx] = "O"

                    self.grid_world[idx-self.dim] = "P"
            
        elif action == "down":
            
             # if path is Oob
            
            if idx in range(len(self.grid_world)-self.dim,len(self.grid_world)): ##################
                
                return
            
            else:
                
                if self.grid_world[idx+self.dim] == "X":
                    
                    return
                
                # if path isnt blocked
                
                else:

                    self.grid_world[idx] = "O"

                    self.grid_world[idx+self.dim] = "P"
            
        else:
            print("Please choose an action [left,right,up,down]!")        
    
    def visualize(self):
        
        # Show GW
        
        print("Gridworld:\n")
        print(self.grid_world.reshape(((self.dim, self.dim))))
        print("\n")

        # Show RW

        print("Reward world:\n")
        print(self.reward_world.reshape(((self.dim, self.dim))))
        print("\n")
        
        pass
    

In [152]:
# Specify dimensions

while True:    
    try:
        dim = int(input("Please provide your desired grid dimension (dim X dim):\n"))
        
        if dim >= 5:
            break
            
        print("Dimension needs to be larger than 4!\n")
    
    except:
        print("Please provide an integer value!\n")

Please provide your desired grid dimension (dim X dim):
15


In [153]:
# Create GW object

gw = Gridworld(dim)

gw.build()

# Show GW

gw.visualize()


Gridworld:

[['P' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'X' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['X' 'O' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'X' 'O' 'O' 'X' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'X' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'X' 'O' 'O']
 ['O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O']
 ['X' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O']]


Reward world:

[['O' 'O' 'O' 'O' 'O' 'O'

In [184]:
# Move

gw.move("left")

# Show GW

gw.visualize()

Gridworld:

[['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'X' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['X' 'O' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'X' 'O' 'O' 'X' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'X' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'X' 'O' 'O']
 ['O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O']
 ['X' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'P' 'O' 'O' 'O' 'O']]


Reward world:

[['O' 'O' 'O' 'O' 'O' 'O'