In [15]:
import random
import math
from environment import Agent, Environment
from planner import RoutePlanner
from simulator import Simulator

class LearningAgent(Agent):
    """ An agent that learns to drive in the Smartcab world.
        This is the object you will be modifying. """ 

    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
        super(LearningAgent, self).__init__(env)     # Set the agent in the evironment 
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning # Whether the agent is expected to learn
        self.Q = dict()          # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon   # Random exploration factor
        self.alpha = alpha       # Learning factor

        ###########
        ## TO DO ##
        ###########
        # Set any additional class parameters as needed
        self.t = 1
        random.seed(1177)

    def reset(self, destination=None, testing=False):
        """ The reset function is called at the beginning of each trial.
            'testing' is set to True if testing trials are being used
            once training trials have completed. """

        # Select the destination as the new location to route to
        self.planner.route_to(destination)
        
        ########### 
        ## TO DO ##
        ###########
        # Update epsilon using a decay function of your choice
        # Update additional class parameters as needed
        # If 'testing' is True, set epsilon and alpha to 0

        
        if testing:
            self.epsilon = 0.0
            self.alpha = 0.0
        else:
            # commented out testing parameters
            # self.epsilon = self.epsilon - 0.05 # Used for linear 
            self.t += 1.0
            self.epsilon = 1.0/(self.t**2)
            # self.epsilon = math.fabs(math.cos(self.alpha*self.t))
            # self.epsilon = 1.0/(self.t**2 + self.alpha*self.t)
            
            # self.epsilon -= self.epsilon / 100
            # self.epsilon = 1.0/(self.t**2 - self.alpha*self.t)
            # self.epsilon = math.fabs(math.cos(self.alpha*self.t))
            # self.epsilon = math.fabs(math.cos(self.alpha*self.t))/(self.t**2)
            # self.epsilon = 1.0/(self.t**2)
            # self.epsilon = math.fabs(math.cos(self.alpha*self.t))

        return None

    def build_state(self):
        """ The build_state function is called when the agent requests data from the 
            environment. The next waypoint, the intersection inputs, and the deadline 
            are all features available to the agent. """

        # Collect data about the environment
        waypoint = self.planner.next_waypoint() # The next waypoint 
        inputs = self.env.sense(self)           # Visual input - intersection light and traffic
        deadline = self.env.get_deadline(self)  # Remaining deadline

        ########### 
        ## TO DO ##
        ###########
        
        # NOTE : you are not allowed to engineer eatures outside of the inputs available.
        # Because the aim of this project is to teach Reinforcement Learning, we have placed 
        # constraints in order for you to learn how to adjust epsilon and alpha, and thus learn about the balance between exploration and exploitation.
        # With the hand-engineered features, this learning process gets entirely negated.
        
        # Set 'state' as a tuple of relevant data for the agent       

        state = (waypoint, inputs['left'], inputs['light'], inputs['oncoming'])
        stringify = lambda s: 'None' if s is None else str(s)
        state = [stringify(s) for s in state]
        return tuple(state)


    def get_maxQ(self, state):
        """ The get_max_Q function is called when the agent is asked to find the
            maximum Q-value of all actions based on the 'state' the smartcab is in. """

        ########### 
        ## TO DO ##
        ###########
        # Calculate the maximum Q-value of all actions for a given state

        maxQ = max(self.Q[state].values())

        return maxQ 


    def createQ(self, state):
        """ The createQ function is called when a state is generated by the agent. """

        ########### 
        ## TO DO ##
        ###########
        # When learning, check if the 'state' is not in the Q-table
        # If it is not, create a new dictionary for that state
        #   Then, for each action available, set the initial Q-value to 0.0

        if self.learning:
            self.Q[state] = self.Q.get(state, {None:0.0, 'forward':0.0, 'left':0.0, 'right':0.0})
        
        return


    def choose_action(self, state):
        """ The choose_action function is called when the agent is asked to choose
            which action to take, based on the 'state' the smartcab is in. """

        # Set the agent state and default action
        self.state = state
        self.next_waypoint = self.planner.next_waypoint()
        action = None

        ########### 
        ## TO DO ##
        ###########
        # When not learning, choose a random action
        # When learning, choose a random action with 'epsilon' probability
        # Otherwise, choose an action with the highest Q-value for the current state
        # Be sure that when choosing an action with highest Q-value that you randomly select between actions that "tie".


        if not self.learning:
            action = random.choice(self.valid_actions)
        if self.learning:
            if self.epsilon > random.random():
                action = random.choice(self.valid_actions)
            else:
                valid_actions = []
                maxQ = self.get_maxQ(state)
                for actions in self.Q[state]:
                    if maxQ == self.Q[state][actions]:
                        valid_actions.append(actions)
                action = random.choice(valid_actions)
        return action


    def learn(self, state, action, reward):
        """ The learn function is called after the agent completes an action and
            receives a reward. This function does not consider future rewards 
            when conducting learning. """

        ########### 
        ## TO DO ##
        ###########
        # When learning, implement the value iteration update rule
        #   Use only the learning rate 'alpha' (do not use the discount factor 'gamma')

        if self.learning:
            self.Q[state][action] = self.Q[state][action] + self.alpha*(reward-self.Q[state][action])
        return


    def update(self):
        """ The update function is called when a time step is completed in the 
            environment for a given trial. This function will build the agent
            state, choose an action, receive a reward, and learn if enabled. """

        state = self.build_state()          # Get current state
        self.createQ(state)                 # Create 'state' in Q-table
        action = self.choose_action(state)  # Choose an action
        reward = self.env.act(self, action) # Receive a reward
        self.learn(state, action, reward)   # Q-learn

        return
        

def run():
    """ Driving function for running the simulation. 
        Press ESC to close the simulation, or [SPACE] to pause the simulation. """

    ##############
    # Create the environment
    # Flags:
    #   verbose     - set to True to display additional output from the simulation
    #   num_dummies - discrete number of dummy agents in the environment, default is 100
    #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
    env = Environment(num_dummies=100, grid_size=[8,6])
    
    ##############
    # Create the driving agent
    # Flags:
    #   learning   - set to True to force the driving agent to use Q-learning
    #    * epsilon - continuous value for the exploration factor, default is 1
    #    * alpha   - continuous value for the learning rate, default is 0.5
    agent = env.create_agent(LearningAgent, learning=True, epsilon=1.0, alpha=0.95)
    
    ##############
    # Follow the driving agent
    # Flags:
    #   enforce_deadline - set to True to enforce a deadline metric
    env.set_primary_agent(agent, enforce_deadline=True)

    ##############
    # Create the simulation
    # Flags:
    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
    #   display      - set to False to disable the GUI if PyGame is enabled
    #   log_metrics  - set to True to log trial and simulation results to /logs
    #   optimized    - set to True to change the default log file name
    sim = Simulator(env, update_delay=0.01, log_metrics=True, optimized=True)
    
    ##############
    # Run the simulator
    # Flags:
    #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
    #   n_test     - discrete number of testing trials to perform, default is 0
    sim.run(n_test=10, tolerance=0.00005)


if __name__ == '__main__':
    run()



/-------------------------
| Training trial 1
\-------------------------

Simulating trial. . . 
epsilon = 0.2500; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent attempted driving forward through a red light. (rewarded -10.99)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.31)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('right', 'left', 'red', 'None')
Agent followed the waypoint right. (rewarded 2.44)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('right', 'left', 'red', 'forward')
Agent attempted driving forward through a red light with traffic and cause a major accident. (r


/-------------------
| Step 23 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.88)
4% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Agent previous state: ('forward', 'right', 'red', 'None')
Agent attempted driving left through a red light. (rewarded -10.12)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 3
\-------------------------

Simulating trial. . . 
epsilon = 0.0625; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.26)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent attempted driving left thr

Agent previous state: ('left', 'None', 'green', 'right')
Agent idled at a green light with no oncoming traffic. (rewarded -5.48)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.81)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.41)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'right')
Agent attempted driving left through traffic and cause a minor accident. (rewarded -20.16)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent drove right instead of 


/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent attempted driving left through a red light. (rewarded -9.17)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent followed the waypoint right. (rewarded 2.45)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'forward')
Agent drove forward instead of left. (rewarded 0.29)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('left', 'forward', 'green', 'None')
Agent drove forward instead of left. (rewarded 0.06)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent 

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.45)
69% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.04)
66% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.19)
63% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.23)
60% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.57)
57% of time remaining to 


/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'right')
Agent drove right instead of forward. (rewarded 1.58)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('left', 'left', 'red', 'None')
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.37)
36% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('left', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.53)
32% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'None')
Agent drove right instead of left. (rewarded 0.22)
28% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Agent previous state:

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.53)
57% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.53)
54% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'right')
Agent drove right instead of forward. (rewarded 1.77)
51% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'forward')
Agent idled at a green light with no oncoming traffic. (rewarded -4.40)
49% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'right')
Agent drove right instead of left. (rewarded 1.18)
46% of 


/-------------------
| Step 18 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 0.94)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.28)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 16
\-------------------------

Simulating trial. . . 
epsilon = 0.0035; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.34)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.82)
92%


/-------------------
| Step 25 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 0.87)
13% of time remaining to reach destination.

/-------------------
| Step 26 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'left')
Agent followed the waypoint left. (rewarded 0.93)
10% of time remaining to reach destination.

/-------------------
| Step 27 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 1.78)
7% of time remaining to reach destination.

/-------------------
| Step 28 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'left')
Agent idled at a green light with no oncoming traffic. (rewarded -5.71)
3% of time remaining to reach destination.

/-------------------
| Step 29 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')



/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('left', 'forward', 'green', 'None')
Agent drove forward instead of left. (rewarded 1.27)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.81)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.75)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'left')
Agent followed the waypoint left. (rewarded 1.57)
40% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 20
\-------------------------

Simulating trial. . 


/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.23)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'forward')
Agent followed the waypoint forward. (rewarded 2.60)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'right')
Agent followed the waypoint right. (rewarded 2.61)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.36)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idle

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('forward', 'right', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.96)
28% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.26)
24% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Agent previous state: ('forward', 'forward', 'green', 'None')
Agent drove right instead of forward. (rewarded 0.78)
20% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.87)
16% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'right')
Agent drove ri


/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.76)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'left')
Agent drove forward instead of left. (rewarded -0.03)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 0.59)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('left', 'right', 'red', 'left')
Agent drove right instead of left. (rewarded -0.20)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'forward')
Agent followed the waypoi

Agent previous state: ('forward', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.06)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'None')
Agent drove right instead of forward. (rewarded 0.62)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 0.72)
40% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 29
\-------------------------

Simulating trial. . . 
epsilon = 0.0011; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('right', 'right', 'green', 'right')
Agent drove forward instead of right. (rewarded 1.39)
97% of time remaining to reach destination.

/-----------


/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent followed the waypoint right. (rewarded 1.39)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.69)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'right')
Agent properly idled at a red light. (rewarded 1.87)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.40)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a r


/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.28)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.17)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'left')
Agent idled at a green light with no oncoming traffic. (rewarded -4.50)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'None')
Agent drove right instead of left. (rewarded 1.07)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followe

Agent previous state: ('right', 'left', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.50)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent drove right instead of forward. (rewarded 0.18)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 2.28)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.43)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.78)
64% of time remaining to reach dest

| Step 0 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.25)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'right')
Agent followed the waypoint right. (rewarded 1.69)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'left')
Agent followed the waypoint forward. (rewarded 1.46)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.84)
80% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 40
\-------------------------

Simulating trial. . . 
epsilon = 


Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.89)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.18)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 0.87)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 1.10)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.66)
40% of time remaining

Agent previous state: ('left', 'None', 'green', 'left')
Agent followed the waypoint left. (rewarded 1.67)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('left', 'right', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.55)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.85)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.62)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'left')
Agent followed the waypoint left. (rewarded 2.56)
45% of time remaining to reach destination.




/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 0.90)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'left')
Agent drove left instead of forward. (rewarded 0.10)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('right', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.24)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent followed the waypoint right. (rewarded 2.53)
52% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 48
\-------------------------

Simulating tri


/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'None')
Agent drove right instead of forward. (rewarded 0.18)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 2.45)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.34)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.25)
80% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'left')
Agent drove left inst


/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.00)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.64)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.17)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.28)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the w

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.69)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.59)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.31)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 2.61)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.42)
76% of time remaining to reach destination.

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.16)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'forward')
Agent drove right instead of forward. (rewarded 1.00)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('left', 'forward', 'green', 'left')
Agent drove forward instead of left. (rewarded 1.43)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.54)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.51)
35% of time remaining 

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 1.40)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.53)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'None')
Agent drove left instead of forward. (rewarded 1.76)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 1.89)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.97)
68% of time remaining to re

Agent previous state: ('left', 'None', 'green', 'forward')
Agent drove right instead of left. (rewarded 0.60)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 64
\-------------------------

Simulating trial. . . 
epsilon = 0.0002; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.75)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.42)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'forward', 'green', 'None')
Agent drove right instead of forward. (rewarded 0.37)
90% of time remaining to reach destination.

/-----


/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 1.78)
94% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'forward', 'green', 'None')
Agent drove right instead of forward. (rewarded 1.52)
91% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'forward', 'green', 'None')
Agent drove right instead of left. (rewarded 0.06)
89% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'right')
Agent drove forward instead of left. (rewarded 1.64)
86% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'forward', 'red', 'left')
Agent prope

Agent previous state: ('left', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 2.05)
63% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'forward')
Agent drove right instead of left. (rewarded 1.46)
60% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.82)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.41)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.19)
50% of time remaining to reach desti

Agent previous state: ('left', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.48)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.15)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'left')
Agent followed the waypoint forward. (rewarded 2.11)
56% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 70
\-------------------------

Simulating trial. . . 
epsilon = 0.0002; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.45)
96% of time remaining to reach destination.

/-------------------



/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.77)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 0.95)
40% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 73
\-------------------------

Simulating trial. . . 
epsilon = 0.0002; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.03)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'right', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.89


/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('left', 'forward', 'green', 'None')
Agent drove right instead of left. (rewarded 1.73)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent followed the waypoint right. (rewarded 0.92)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('right', 'right', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.47)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'forward')
Agent followed the waypoint right. (rewarded 1.82)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'left')
Agent properly idled

Simulating trial. . . 
epsilon = 0.0002; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'forward')
Agent drove forward instead of left. (rewarded 0.90)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.18)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.67)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.14)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left',

90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'right')
Agent drove right instead of forward. (rewarded 0.28)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.60)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'right')
Agent properly idled at a red light. (rewarded 1.53)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'forward')
Agent drove right instead of left. (rewarded 0.39)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('right', '

Agent previous state: ('forward', 'left', 'green', 'left')
Agent drove left instead of forward. (rewarded 0.49)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.82)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'right')
Agent followed the waypoint right. (rewarded 2.90)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'None')
Agent drove forward instead of left. (rewarded 1.52)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.58)
68% of time remaining to reach destina


/-------------------
| Step 18 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.39)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'forward')
Agent followed the waypoint forward. (rewarded 0.80)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 86
\-------------------------

Simulating trial. . . 
epsilon = 0.0001; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'left')
Agent followed the waypoint left. (rewarded 1.17)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'None')
Agent drove right instead of forward. (rewarde


/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent attempted driving forward through a red light. (rewarded -10.90)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'forward', 'red', 'forward')
Agent properly idled at a red light. (rewarded 2.66)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.88)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.78)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('forward', 'None', 'g

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.43)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('forward', 'forward', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.08)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.66)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.36)
67% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.09)
63% of time remaining to r


/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('forward', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.78)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.21)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.36)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 0.43)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent proper

Agent previous state: ('left', 'right', 'green', 'None')
Agent followed the waypoint left. (rewarded 0.92)
60% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 97
\-------------------------

Simulating trial. . . 
epsilon = 0.0001; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('left', 'right', 'red', 'forward')
Agent properly idled at a red light. (rewarded 2.96)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 2.63)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 0.99)
85% of time remaining to reach destination.

/-------------------



/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.33)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.06)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.93)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.77)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. 

Agent previous state: ('forward', 'left', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.71)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.32)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.72)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.26)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'right', 'red', 'right')
Agent attempted driving left through a red light with traffic and cause a major a

Agent previous state: ('forward', 'right', 'green', 'None')
Agent drove left instead of forward. (rewarded 0.94)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('right', 'forward', 'green', 'None')
Agent followed the waypoint right. (rewarded 1.01)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 2.27)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.05)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'right')
Agent followed the waypoint forward. (rewarded 1.88)
20% of time remainin


/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 2.65)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.79)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('left', 'left', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.69)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'left')
Agent followed the waypoint left. (rewarded 1.45)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'None')
Agent drove right instead of fo


/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.70)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'forward', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.64)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.69)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 2.09)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly 

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 2.50)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.32)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'forward')
Agent followed the waypoint forward. (rewarded 1.86)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.27)
70% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 115
\-------------------------

Simulating trial. . . 
epsilon = 0.0001; alpha = 0.9500

/----


/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.31)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'right', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.59)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.09)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'forward', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.94)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly id


/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'forward')
Agent followed the waypoint forward. (rewarded 1.26)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'forward', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.88)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.54)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'left', 'green', 'left')
Agent followed the waypoint left. (rewarded 1.55)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed th

Agent previous state: ('left', 'None', 'green', 'left')
Agent followed the waypoint left. (rewarded 2.04)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('left', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.23)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.17)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'forward')
Agent drove right instead of left. (rewarded 1.70)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.55)
55% of time remaining to reach destin


/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'right')
Agent followed the waypoint forward. (rewarded 2.14)
15% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 127
\-------------------------

Simulating trial. . . 
epsilon = 0.0001; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 2.01)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 2.20)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'forward', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1

Agent previous state: ('right', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 0.03)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('right', 'None', 'red', 'None')
Agent followed the waypoint right. (rewarded 1.42)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.75)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.48)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 2.46)
83% of time remaining to reach de

Agent properly idled at a red light. (rewarded 2.03)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'right', 'red', 'right')
Agent drove right instead of forward. (rewarded 0.68)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('left', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.44)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.88)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('left', 'right', 'green', 'left')
Agent followed the waypoint left. (rewarded 1.90)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\------


/-------------------
| Step 4 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'left')
Agent followed the waypoint forward. (rewarded 1.44)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Agent previous state: ('right', 'left', 'red', 'None')
Agent followed the waypoint right. (rewarded 1.79)
76% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 137
\-------------------------

Simulating trial. . . 
epsilon = 0.0001; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'forward')
Agent properly idled at a red light. (rewarded 1.89)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'right')
Agent properly idled at a red light. (rewarded 2.22)
92% of

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 140
\-------------------------

Simulating trial. . . 
epsilon = 0.0001; alpha = 0.9500

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed the waypoint right. (rewarded 2.61)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Agent previous state: ('forward', 'right', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.65)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'forward')
Agent followed the waypoint forward. (rewarded 2.09)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Agent previous state: ('right', 'forward', 'red', 'None')
Agent properly idled at a red light. (rewarded 1.

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.30)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Agent previous state: ('forward', 'left', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.48)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.72)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 2.23)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.96)
52% of time remaining to r

Agent previous state: ('left', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.42)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('left', 'None', 'red', 'right')
Agent properly idled at a red light. (rewarded 2.68)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('left', 'None', 'green', 'None')
Agent followed the waypoint left. (rewarded 1.52)
40% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 6
\-------------------------

Simulating trial. . . 
epsilon = 0.0000; alpha = 0.0000

/-------------------
| Step 0 Results
\-------------------

Agent previous state: ('forward', 'left', 'red', 'left')
Agent properly idled at a red light. (rewarded 1.16)
97% of time remaining to reach destination.

/-------------------
| Step 


/-------------------
| Step 13 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.58)
44% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.23)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Agent previous state: ('forward', 'None', 'red', 'None')
Agent properly idled at a red light. (rewarded 2.46)
36% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Agent previous state: ('forward', 'None', 'green', 'None')
Agent followed the waypoint forward. (rewarded 1.91)
32% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Agent previous state: ('right', 'None', 'green', 'None')
Agent followed 