##### To do:
    - Prevent obstacles from potentially blocking all pathways to terminal state and trapping player (never happened before)
    - If youre on a -1 reward field and try to make an invalid move you end up in the same state. Does that mean that you 
    again receive a negative reward?
    - Think about if reward world is necessary (Right now serves to not clutter the visualization)
    

In [1]:
import numpy as np
import copy
import random

In [2]:
# Create Gridworld object

class Gridworld:
    
    def __init__(self,dim):
        
        self.dim = dim
        self.gid_world = []
        self.reward_world = []
        
        self.gid_world_original = []
        self.reward_world_original = []
        
        self.free_fields = []
        self.goal = []
        self.tele_idx = []
        
        self.finished = False
        
    
    def build(self):
        
        ######### Build GW #########

        for x in range(self.dim+1):
            self.grid_world = ["O"] * (x * x)

        # Create obstacles

        self.grid_world = np.array(self.grid_world)

        num_obs = int((self.dim*self.dim)/8) # specify number of obstacles

        obstacle_indices = np.random.choice(np.arange(1,self.grid_world.size), replace=False, size=num_obs) # start at one to not place obstacles at player pos

        self.grid_world[obstacle_indices] = "X"
        
        ######### Build RW and Teleport #########

        # Assign rewards to states

        self.reward_world =copy.deepcopy(self.grid_world.flatten()) # create seperate array for reward

        self.free_fields = [x for x in np.arange(self.reward_world.size) if x not in obstacle_indices if x != 0] # generate list of free fields

        np.random.shuffle(self.free_fields) # shuffle free fields

        self.goal = self.free_fields[-1] # choose index for postive reward (terminal state)
        
        self.tele_idx = [self.free_fields[-2],self.free_fields[-3]]# choose 2 indices for teleports
        
        rew_neg_amount = 5 # specify number of negative rewards

        rew_neg_indices = np.random.choice(self.free_fields[1:-3], replace=False, size=rew_neg_amount) # randomly choose incides for negative rewards

        self.reward_world[rew_neg_indices] = "-" # place negative rewards

        self.reward_world[self.goal] = "+" # place positive reward (terminal state)
        
        self.reward_world[self.tele_idx] = "§" # place teleports

        # Save Backup

        self.reward_world_original = copy.deepcopy(self.reward_world) 
        
        self.grid_world_original = copy.deepcopy(self.grid_world)
        
        
        ######### Set starting point #########

        self.grid_world[0] = "P" # top left
        
        return self.grid_world, self.reward_world
    
    def reset(self):
        
        self.reward_world = self.reward_world_original
        self.grid_world = self.grid_world_original

        return self.grid_world, self.reward_world

    def move(self, action):
        
        idx = np.where(self.grid_world == "P")[0]
        idx = idx[0]
        
        self.reward = 0
        
        if action == "left":
            
            # if path is Oob
            
            if idx == 0 or (idx%self.dim) == 0:
        
                return self.grid_world, self.finished, self.reward
            
            else:
                
                if self.grid_world[idx-1]  == "X":
                    
                    return self.grid_world, self.finished, self.reward
                
                # check for teleports and teleport with 60% probability
                
                elif idx-1 == self.tele_idx[0] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[1]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                    
                elif idx-1 == self.tele_idx[1] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[0]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                   
                # if path isnt blocked
                
                else:
                    
                    # check for rewards
                    
                    if self.reward_world[idx-1] == "+":
                        
                        self.reward = +1
                        self.finished = True
                        
                    elif self.reward_world[idx-1] == "-":
                        
                        self.reward = -1
                        
                    # move player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[idx-1] = "P"
                
                return self.grid_world, self.finished, self.reward
            
        elif action == "right":
            
            # if path is Oob
            
            if idx == (self.dim-1) or idx == (len(self.grid_world)-1):
                
                return self.grid_world, self.finished, self.reward
            
            else:
                
                if self.grid_world[idx+1] == "X":
                    
                    return self.grid_world, self.finished, self.reward
                
                # check for teleports and teleport with 60% probability
                
                elif idx+1 == self.tele_idx[0] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[1]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                    
                elif idx+1 == self.tele_idx[1] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[0]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                
                # if path isnt blocked
                
                else:
                    
                    # check for rewards
                    
                    if self.reward_world[idx+1] == "+":
                        
                        self.reward = +1
                        self.finished = True
                        
                    elif self.reward_world[idx+1] == "-":
                        
                        self.reward = -1
                        
                    # move player
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[idx+1] = "P"
                    
                    return self.grid_world, self.finished, self.reward
            
        elif action == "up":
            
             # if path is Oob
            
            if idx in range(0,(self.dim-1)):
                
                return self.grid_world, self.finished, self.reward
            
            else:
                
                if self.grid_world[idx-self.dim] == "X":
                    
                    return self.grid_world, self.finished, self.reward
                
                # check for teleports and teleport with 60% probability
                
                elif idx-self.dim == self.tele_idx[0] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[1]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                    
                elif idx-self.dim == self.tele_idx[1] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[0]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                
                # if path isnt blocked
                
                else:
                    
                    # check for rewards
                    
                    if self.reward_world[idx-self.dim] == "+":
                        
                        self.reward = +1
                        self.finished = True
                        
                    elif self.reward_world[idx-self.dim] == "-":
                        
                        self.reward = -1
                        
                    # move player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[idx-self.dim] = "P"
                    
                    return self.grid_world, self.finished, self.reward
            
        elif action == "down":
            
             # if path is Oob
            
            if idx in range(len(self.grid_world)-self.dim,len(self.grid_world)):
                
                return self.grid_world, self.finished, self.reward
            
            else:
                
                if self.grid_world[idx+self.dim] == "X":
                    
                    return self.grid_world, self.finished, self.reward
                
                # check for teleports and teleport with 60% probability
                
                elif idx+self.dim == self.tele_idx[0] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[1]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                    
                elif idx+self.dim == self.tele_idx[1] and random.randint(0,100) > 40:
                    
                    # teleport player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[self.tele_idx[0]] = "P"
                    
                    return self.grid_world, self.finished, self.reward
                
                # if path isnt blocked
                
                else:
                    
                    # check for rewards
                    
                    if self.reward_world[idx+self.dim] == "+":
                        
                        self.reward = +1
                        self.finished = True
                        
                    elif self.reward_world[idx+self.dim] == "-":
                        
                        self.reward = -1
                        
                    # move player
                    
                    self.grid_world[idx] = "O"
                    
                    self.grid_world[idx+self.dim] = "P"
                    
                    return self.grid_world, self.finished, self.reward
            
        else:
            print("Please choose an action [left,right,up,down]!")        
    
    def visualize(self):
        
        # Show GW
        
        print("Gridworld:\n")
        print(self.grid_world.reshape(((self.dim, self.dim))))
        print("\n")

        # Show RW

        print("Rewardworld:\n")
        print(self.reward_world.reshape(((self.dim, self.dim))))
        print("\n")
        
        pass
    

In [6]:
# Specify dimensions

while True:    
    try:
        dim = int(input("Please provide your desired grid dimension (dim X dim):\n"))
        
        if dim >= 5:
            break
            
        print("Dimension needs to be larger than 4!\n")
    
    except:
        print("Please provide an integer value!\n")

Please provide your desired grid dimension (dim X dim):
15


In [21]:
# Create GW object

gw = Gridworld(dim)

gw.build()

# Show GW

gw.visualize()


Gridworld:

[['P' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'X' 'O' 'O' 'O']
 ['O' 'O' 'O' 'X' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X']
 ['X' 'X' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'X' 'O' 'O']
 ['X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['X' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'X']
 ['X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O']]


Rewardworld:

[['O' 'O' 'O' 'O' 'O' 'O' 

In [47]:
# Move

_,k,b = gw.move("down")

# Show GW

gw.visualize()

print("Reward: " + str(b))
print("Finished: " + str(k))

Gridworld:

[['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'X' 'O' 'O' 'O']
 ['O' 'O' 'O' 'X' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X']
 ['X' 'X' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'X' 'O' 'O']
 ['X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'O' 'P' 'O' 'O' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']
 ['X' 'O' 'X' 'O' 'X' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'X']
 ['X' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O' 'O' 'O' 'O' 'X' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O']
 ['O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'X' 'O']]


Rewardworld:

[['O' 'O' 'O' 'O' 'O' 'O' 