In [None]:
'''
Define a reward for each cell in the maze
Rewards are high in the center and decrease radially out

self.maze_rewards = np.zeros((maze_dim,maze_dim))
for x in [i for i in range(-maze_dim/2,maze_dim/2+1) if i != 0]:
    for y in [j for j in range(-maze_dim/2,maze_dim/2+1) if j != 0]:
        if x < 0: 
            dx = maze_dim/2 
        else: 
            dx = maze_dim/2 - 1
        if y < 0:
            dy = maze_dim/2
        else:
            dy = maze_dim/2 - 1
        self.maze_rewards[x+dx][y+dy] = maze_dim + 1 - (abs(x)+abs(y))
'''

In [None]:
from maze import Maze
from robot import Robot
import numpy as np
import sys
import random


# global dictionaries for robot movement and sensing

# Mapping of robot's local coordinate system to global maze coordinate system
dir_sensors = {'u': ['l', 'u', 'r'], 'r': ['u', 'r', 'd'],
               'd': ['r', 'd', 'l'], 'l': ['d', 'l', 'u']}
dir_mapping = {'u': ['l', 'u', 'r','d'], 'r': ['u', 'r', 'd','l'],
               'd': ['r', 'd', 'l','u'], 'l': ['d', 'l', 'u','r']}
dir_move = {'u': [0, 1], 'r': [1, 0], 'd': [0, -1], 'l': [-1, 0]}
dir_reverse = {'u': 'd', 'r': 'l', 'd': 'u', 'l': 'r',
               'up': 'd', 'right': 'l', 'down': 'u', 'left': 'r'}

# Create a maze based on input argument on command line.
testmaze = Maze("test_maze_01.txt")


def move(movement, rotation):
    # perform rotation
    if rotation == -90:
        robot_pos['heading'] = dir_mapping[robot_pos['heading']][0]
    elif rotation == 90:
        robot_pos['heading'] = dir_mapping[robot_pos['heading']][2]
    elif rotation == 0:
        pass
    else:
        print "Invalid rotation value, no rotation performed."
                
    if abs(movement) > 3:
        print "Movement limited to three squares in a turn."
    movement = max(min(int(movement), 3), -3) # fix to range [-3, 3]
    while movement:
        if movement > 0:
            if testmaze.is_permissible(robot_pos['location'], robot_pos['heading']):
                robot_pos['location'][0] += dir_move[robot_pos['heading']][0]
                robot_pos['location'][1] += dir_move[robot_pos['heading']][1]
                movement -= 1
            else:
                print "Movement stopped by wall."
                movement = 0
        else:
            rev_heading = dir_reverse[robot_pos['heading']]
            if testmaze.is_permissible(robot_pos['location'], rev_heading):
                robot_pos['location'][0] += dir_move[rev_heading][0]
                robot_pos['location'][1] += dir_move[rev_heading][1]
                movement += 1
            else:
                print "Movement stopped by wall."
                movement = 0

In [None]:
class Robot(object):
    def __init__(self, maze_dim, start_location, start_heading):
        '''
        Use the initialization function to set up attributes that your robot
        will use to learn and navigate the maze. Some initial attributes are
        provided based on common information, including the size of the maze
        the robot is placed in.
        '''
        
        import numpy as np
        import random
        
        # Robot parameters
        self.learning = True
        self.epsilon = 0
        self.alpha = 0.1
        self.gamma = 0.1
        self.maze_dim = maze_dim
        self.all_actions = [[0,1],[0,2],[0,3],
                            [0,-1],[0,-2],[0,-3],
                            [1,0],[2,0],[3,0],
                            [-1,0],[-2,0],[-3,0]]
 
        # Learned information
        self.Q_table = dict()
        self.dead_ends = []
        self.paths_travelled = np.zeros((maze_dim,maze_dim))
        
        # Present info
        self.move_count = 0
        self.current_location = start_location
        self.heading = start_heading
        
        # Past info
        self.last_action = (0,0)
        self.last_reward = 0
        self.last_location = start_location
    
    
        #Define a reward for each cell in the maze
        #Rewards are increasingly negative as you move radially out
        #from the center

        self.maze_rewards = np.zeros((maze_dim,maze_dim))
        for x in [i for i in range(-maze_dim/2,maze_dim/2+1) if i != 0]:
            for y in [j for j in range(-maze_dim/2,maze_dim/2+1) if j != 0]:
                if x < 0: 
                    dx = maze_dim/2 
                else: 
                    dx = maze_dim/2 - 1
                if y < 0:
                    dy = maze_dim/2
                else:
                    dy = maze_dim/2 - 1
                #self.maze_rewards[x+dx][y+dy] = maze_dim + 1 - (abs(x)+abs(y))
                self.maze_rewards[x+dx][y+dy] = - (abs(x)+abs(y)) +2
        
        # set center squares to a large positive reward
        max_reward = +10
        for i in [-1, 0]:
            for j in [-1,0]:
                self.maze_rewards[maze_dim/2 + i][maze_dim/2 + j] = max_reward

    def get_valid_next_locations(self,sensors):

        valid_moves = []
        valid_moves = list(sensors)

        current_location = np.array(self.current_location)
        last_location = np.array(self.last_location)

        # Get the steps and direction of the last move taken
        last_move = current_location - last_location
        if not sum(last_move)==0:
            last_move_dir = last_move / np.linalg.norm(last_move)

            # If robot's last move was to step forward, add stepping backward up
            # to a maximum of the same number of steps to the valid moves list
            if np.array_equal(last_move_dir,dir_move[self.heading]):
                valid_moves.append(abs(sum(last_move)))
        
        # Convert valid moves to valid locations robot can move to 
        valid_next_locations = []
        for i,val in enumerate(valid_moves):
            if val > 0:
                for x in range(1,val+1):
                    if x <= 3:
                        if dir_mapping[self.heading][i] == 'u':
                            valid_next_locations.append(tuple((current_location + [0,x])))
                        elif dir_mapping[self.heading][i] == 'd':
                            valid_next_locations.append(tuple(current_location + [0,-x]))
                        elif dir_mapping[self.heading][i] == 'l':
                            valid_next_locations.append(tuple(current_location + [-x,0]))
                        else:
                            valid_next_locations.append(tuple(current_location + [x,0]))
                    else:
                        break
        
        # Remove next locations which are dead ends from list
        for dead_end in self.dead_ends:
            for next_location in valid_next_locations:
                if next_location == dead_end:
                    valid_next_locations.remove(next_location)
                    
        return valid_next_locations

    def update_Q_table(self, valid_next_locations):
        
        ######################################
        # Create new entry in Q table
        # If new valid next locations were found, add to table
        # Update state and action
        ######################################

        # Add current location to the Q table if it's not there
        if not self.current_location in self.Q_table:       
            self.Q_table[self.current_location] = {x:0.01*self.maze_rewards[x] for x in valid_next_locations}
        else:
            # Loop through existing entries for valid next locations to see
            # if there are any that need to be added 
            locations_to_add = list(valid_next_locations)
            for i in valid_next_locations:
                for j in self.Q_table[self.current_location].keys():
                    if i[0] == j[0] and i[1] == j[1]:
                        locations_to_add.remove(i)
            
            # Add missing valid next locations to Q-table
            if not locations_to_add == []:
                for location in locations_to_add:
                    self.Q_table[self.current_location][location] = 0.01*self.maze_rewards[location]

        return
    
    def choose_next_location(self):
        ######################################
        #Choose the valid action with the largest Q value
        ######################################
        
        maxQ = max(self.Q_table[self.current_location].values())
        
        if self.learning == True and random.random() < 1 - self.epsilon:    
            maxQ_locations = [k for k,v in self.Q_table[self.current_location].iteritems() if v == maxQ]
            next_location = random.choice(maxQ_locations)
        else:
            next_location = random.choice(self.Q_table[self.current_location].keys())

        return next_location, maxQ
    
    def choose_path_less_travelled(self,valid_locations):
        
        # Get travelledness of valid_locations
        valid_locations_travelledness = []
        for i in valid_locations:
            valid_locations_travelledness.append(self.paths_travelled[i])
        least_travelled = min(valid_locations_travelledness)

        # Make a list of all the equally less travelled paths
        paths_less_travelled = []
        for i in valid_locations:
            if self.paths_travelled[i] == least_travelled:
                paths_less_travelled.append(i)

        # Randomly choose from one of the less travelled paths
        next_location = random.choice(paths_less_travelled)
        
        return next_location
        
        
    def get_movements(self, next_location):
        
        current_location = np.array(self.current_location)
        next_location = np.array(next_location)
        
        # Get action to get to next location and it's direction
        action = next_location - current_location
        action_dir = list(action/np.linalg.norm(action))
        action_dir = dir_move.keys()[dir_move.values().index(action_dir)]
        
        rotation_mapping = [-90,0,90,0]
        rotation = rotation_mapping[dir_mapping[self.heading].index(action_dir)]
        
        if rotation==0 and self.heading != action_dir:
            movement = -abs(sum(action))
        else:
            movement = abs(sum(action))
            self.heading = action_dir

        return rotation, movement
    
    def update_path_travelled(self,next_location):
        
        current_location = np.array(self.current_location)
        next_location = np.array(next_location)
        
        print "current location: {}".format(current_location)
        print "next_location: {}".format(next_location)
        
        step_dir = next_location - current_location
        step_dir = step_dir/max(abs(step_dir))
        
        path = next_location - step_dir
        while np.array_equal(path,current_location) is False:
            self.paths_travelled[tuple(path)] += 1
            path -= step_dir
 
    def remove_dead_end(self):
        for location in self.Q_table:
            for next_location in self.Q_table[location]:
               if next_location == self.current_location:
                    del self.Q_table[location][next_location]
                    break
    def update_Q_values(self,maxQ):       
        for location in self.Q_table:
            for next_location in self.Q_table[location]:
               if next_location == self.current_location:
                    self.Q_table[location][next_location] += self.alpha*(self.maze_rewards[location]+self.gamma*maxQ) 
                    
    def next_move(self, sensors):
        
        # If hit a dead end, remember and remove from Q table
        if sensors == [0,0,0] and self.move_count > 1:
            self.dead_ends.append(self.current_location)
            self.remove_dead_end()
        
        # Get valid next locations
        valid_next_locations = self.get_valid_next_locations(sensors)
        
        # Update Q-table entry
        self.update_Q_table(valid_next_locations)
                
        # Choose action and get maxQ
        next_location, maxQ = self.choose_next_location()
           
        # Update Q_table value
        if self.move_count>0:
            self.update_Q_values(maxQ)
            #self.Q_table[self.last_location][self.current_location] += self.alpha*(self.last_reward + self.gamma*maxQ) 
       
        '''
        #### (2) Trémaux's algorithm ####
        next_location = self.choose_path_less_travelled(valid_next_locations)
        self.update_path_travelled(next_location)
        '''
        
        # Given action, get rotation, movement, new location, and new direction
        rotation, movement = self.get_movements(next_location)
        
        # Keep last location and reward for next time step
        self.last_location = self.current_location
        self.last_reward = self.maze_rewards[self.current_location]
        self.current_location = next_location
        
        self.move_count += 1
        #self.epsilon = pow(0.99999,self.move_count)
        
        return rotation, movement

In [None]:
#while testrobot.last_reward != 11:
#for i in range(100):

sensing = [testmaze.dist_to_wall(robot_pos['location'], heading)
                       for heading in dir_sensors[robot_pos['heading']]]
rotation, movement = robot.next_move(sensing)
move(movement, rotation)
maze[tuple(robot.current_location)] += 1

#if robot.maze_rewards[robot.current_location] == 100:
#    break

current_location = np.zeros((robot.maze_dim,robot.maze_dim))
current_location[robot.current_location] = 1

a = []
for i in range(robot.maze_dim):
    a.append([])
    for j in range(robot.maze_dim):
        a[i].append(99)
options = np.array(a)
options[robot.last_location] = 1
for next_location in robot.Q_table[robot.last_location].keys():
    print robot.Q_table[robot.last_location][next_location]
    options[next_location] = robot.Q_table[robot.last_location][next_location]*100
    
#print maze
print "Move count: {}".format(robot.move_count)
print "Last Location: {}".format(robot.last_location)
print "Current Location: {}".format(robot.current_location)
print options
print "past locations:"
print maze


In [9]:

class Robot(object):
    def __init__(self, maze_dim, start_location, start_heading):
        self.last_location = (5,5)
        self.current_location = (1,5)
        self.current_heading = start_heading
        
        self.x = 2
        self.x2 = self.xsquared(self.x)
        
    def xsquared(self,x):
        return x*x
        
testrobot = Robot(12, (0,0), 'u')        

In [11]:

print testrobot.x
print testrobot.x2

2
4
