In [19]:
import random
import math
from environment import Agent, Environment
from planner import RoutePlanner
from simulator import Simulator

class LearningAgent(Agent):
    """ An agent that learns to drive in the Smartcab world.
        This is the object you will be modifying. """ 

    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
        super(LearningAgent, self).__init__(env)     # Set the agent in the evironment 
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning # Whether the agent is expected to learn
        self.Q = dict()          # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon   # Random exploration factor
        self.alpha = alpha       # Learning factor

        ###########
        ## TO DO ##
        ###########
        # Set any additional class parameters as needed
        self.trial = 0
        random.seed(22)


    def reset(self, destination=None, testing=False):
        """ The reset function is called at the beginning of each trial.
            'testing' is set to True if testing trials are being used
            once training trials have completed. """

        # Select the destination as the new location to route to
        self.planner.route_to(destination)
        
        ########### 
        ## TO DO ##
        ###########
        # Update epsilon using a decay function of your choice
        # Update additional class parameters as needed
        # If 'testing' is True, set epsilon and alpha to 0
        # self.epsilon = self.epsilon-0.05
        
        if testing == 'True':
            self.epsilon = 0
            self.alpha = 0
        else:
            self.trial += 1
            # self.epsilon = 1/(self.trial**2)
            # Safety Rating - F, Reliability Rating - B. Zig Zag curves. This is no better than linear decay factor.
            # But the reliability rating has improved to B
            # self.epsilon = math.cos(self.alpha*self.trial) -> Safety F and Reliability - F
            # self.epsilon = (self.alpha**self.trial) -> Ssfety C and Reliability C
            # self.epsilon = 0.90 ** self.trial -> Safety F and Reliability C
            # self.epsilon = 1/(self.trial**2) # -> Safety F and Reliability C
            self.epsilon = math.cos(self.alpha*self.trial) #
            # self.epsilon = 0.98**self.trial 

        return None

    def build_state(self):
        """ The build_state function is called when the agent requests data from the 
            environment. The next waypoint, the intersection inputs, and the deadline 
            are all features available to the agent. """

        # Collect data about the environment
        waypoint = self.planner.next_waypoint() # The next waypoint 
        inputs = self.env.sense(self)           # Visual input - intersection light and traffic
        deadline = self.env.get_deadline(self)  # Remaining deadline

        ########### 
        ## TO DO ##
        ###########
        
        # NOTE : you are not allowed to engineer eatures outside of the inputs available.
        # Because the aim of this project is to teach Reinforcement Learning, we have placed 
        # constraints in order for you to learn how to adjust epsilon and alpha, and thus learn about the balance between exploration and exploitation.
        # With the hand-engineered features, this learning process gets entirely negated.
        
        # Set 'state' as a tuple of relevant data for the agent        
        state = (waypoint,inputs['light'],inputs['left'], inputs['oncoming'])
        return state


    def get_maxQ(self, state):
        """ The get_max_Q function is called when the agent is asked to find the
            maximum Q-value of all actions based on the 'state' the smartcab is in. """

        ########### 
        ## TO DO ##
        ###########
        # Calculate the maximum Q-value of all actions for a given state

        maxQ = max(self.Q[state].values())

        return maxQ


    def createQ(self, state):
        """ The createQ function is called when a state is generated by the agent. """

        ########### 
        ## TO DO ##
        ###########
        # When learning, check if the 'state' is not in the Q-table
        # If it is not, create a new dictionary for that state
        #   Then, for each action available, set the initial Q-value to 0.0
        
        print("state variables below")
        print(state)
        print(self.Q.keys)
        if self.learning == True:
            if state not in self.Q.keys():
                self.Q[state] = {}
                for action in self.valid_actions:
                    self.Q[state][action] = 0.0

        return


    def choose_action(self, state):
        """ The choose_action function is called when the agent is asked to choose
            which action to take, based on the 'state' the smartcab is in. """

        # Set the agent state and default action
        self.state = state
        self.next_waypoint = self.planner.next_waypoint()
        action = None

        ########### 
        ## TO DO ##
        ###########
        # When not learning, choose a random action
        # When learning, choose a random action with 'epsilon' probability
        # Otherwise, choose an action with the highest Q-value for the current state
        # Be sure that when choosing an action with highest Q-value that you randomly select between actions that "tie".
        if not self.learning:
            action = random.choice(self.valid_actions)
        else:
            if self.epsilon > 0.01 and self.epsilon > random.random():
                action = random.choice(self.valid_actions)
            else:
                action = max(self.Q[state], key=self.Q[state].get)
        return action


    def learn(self, state, action, reward):
        """ The learn function is called after the agent completes an action and
            receives a reward. This function does not consider future rewards 
            when conducting learning. """

        ########### 
        ## TO DO ##
        ###########
        # When learning, implement the value iteration update rule
        #   Use only the learning rate 'alpha' (do not use the discount factor 'gamma')
        if self.learning == True:
            self.Q[state][action] = self.Q[state][action] + self.alpha*(reward - self.Q[state][action])

        return


    def update(self):
        """ The update function is called when a time step is completed in the 
            environment for a given trial. This function will build the agent
            state, choose an action, receive a reward, and learn if enabled. """

        state = self.build_state()          # Get current state
        self.createQ(state)                 # Create 'state' in Q-table
        action = self.choose_action(state)  # Choose an action
        reward = self.env.act(self, action) # Receive a reward
        self.learn(state, action, reward)   # Q-learn

        return
        

def run():
    """ Driving function for running the simulation. 
        Press ESC to close the simulation, or [SPACE] to pause the simulation. """

    ##############
    # Create the environment
    # Flags:
    #   verbose     - set to True to display additional output from the simulation
    #   num_dummies - discrete number of dummy agents in the environment, default is 100
    #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
    env = Environment(verbose=True)
    
    ##############
    # Create the driving agent
    # Flags:
    #   learning   - set to True to force the driving agent to use Q-learning
    #    * epsilon - continuous value for the exploration factor, default is 1
    #    * alpha   - continuous value for the learning rate, default is 0.5
    agent = env.create_agent(LearningAgent, learning=True, alpha = 0.01)
    
    ##############
    # Follow the driving agent
    # Flags:
    #   enforce_deadline - set to True to enforce a deadline metric
    env.set_primary_agent(agent, enforce_deadline=True)

    ##############
    # Create the simulation
    # Flags:
    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
    #   display      - set to False to disable the GUI if PyGame is enabled
    #   log_metrics  - set to True to log trial and simulation results to /logs
    #   optimized    - set to True to change the default log file name
    sim = Simulator(env, update_delay=0.01, log_metrics=True,optimized=True)
    
    ##############
    # Run the simulator
    # Flags:
    #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
    #   n_test     - discrete number of testing trials to perform, default is 0
    sim.run(n_test=100, tolerance = 0.001)

if __name__ == '__main__':
    run()

Simulator.__init__(): Error initializing GUI objects; display disabled.
error: Couldn't open images/logo.png

/-------------------------
| Training trial 1
\-------------------------

Environment.reset(): Trial set up with start = (8, 7), destination = (2, 4), deadline = 25
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.0100
Simulating trial. . . 
epsilon = 1.0000; alpha = 0.01

52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('left', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: right, reward: -0.0627063439039
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 13, 't': 12, 'action': 'right', 'reward': -0.06270634390387397, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent drove right instead of left. (rewarded -0.06)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('right', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]

('left', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), action: None, reward: 1.40408285805
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 19, 't': 1, 'action': None, 'reward': 1.4040828580494322, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.40)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('left', 'red', 'right', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), action: left, reward: -9.47847514249
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 

epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9996; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environmen

Agent attempted driving forward through a red light. (rewarded -9.46)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: forward, reward: -10.3488212194
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 2, 't': 18, 'action': 'forward', 'reward': -10.348821219353553, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -10.35)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
state variables below
('forward', 'red', None, None)
<bu

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 4), heading: (0, 1), action: left, reward: -10.5401664713
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', 'left', 'left'), 'deadline': 12, 't': 8, 'action': 'left', 'reward': -10.54016647131287, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', 'left')
Agent attempted driving left through a red light. (rewarded -10.54)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('left', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (1, 0), action: left, reward: 0.992070853484
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'vi

Simulating trial. . . 
epsilon = 0.9988; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9988; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9988; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('left', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: left, reward: -39.7595977346
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 35, 't': 0, 'action': 'left', 'reward': -39.75959773464423, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -39.76)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\--------------

state variables below
('left', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: forward, reward: -10.0178279378
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 11, 't': 24, 'action': 'forward', 'reward': -10.017827937787473, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent attempted driving forward through a red light. (rewarded -10.02)
29% of time remaining to reach destination.

/-------------------
| Step 25 Results
\-------------------

Environment.step(): t = 25
state variables below
('left', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 5), heading: (-1, 0), action: forward, reward: -0.0678570087995
Environment.act(): Step data: {'in

Agent previous state: ('right', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.61)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('right', 'red', None, 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: None, reward: 0.413466372954
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'right'), 'deadline': 15, 't': 15, 'action': None, 'reward': 0.4134663729537015, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 0.41)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
state variables below
('right', 'red'

Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 18, 't': 12, 'action': 'forward', 'reward': 1.0484404595873036, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.05)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('forward', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 4), heading: (-1, 0), action: forward, reward: -40.6536477181
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 17, 't': 13, 'action': 'forward', 'reward': -40.65364771807

epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9960; alpha = 0.0100
Simulating tria


/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 4), heading: (-1, 0), action: right, reward: 0.622829248098
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 4, 't': 16, 'action': 'right', 'reward': 0.622829248098407, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded 0.62)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
state variables below
('forward', 'red', None, 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 4), heading: (-1, 0), action: left, reward: -

| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: None, reward: -5.74121879862
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 1, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 11, 't': 14, 'action': None, 'reward': -5.741218798621728, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.74)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 2), heading: (0, 1), action: right, rewa

\-------------------

Environment.step(): t = 6
state variables below
('forward', 'red', None, 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 4), heading: (-1, 0), action: None, reward: 2.53954723863
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'right'), 'deadline': 19, 't': 6, 'action': None, 'reward': 2.539547238627412, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 2.54)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 4), heading: (-1, 0), action: None, reward: 1.85601219825
Environment.act(): Step 

epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9872; alpha = 0.0100
Simulating tria

('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: None, reward: -4.31146253657
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 1, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 7, 't': 13, 'action': None, 'reward': -4.3114625365747274, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.31)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (7, 4), heading: (-1, 0), action: forward, reward: 2.26485403339
Environme

('forward', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 4), heading: (0, 1), action: forward, reward: 0.425415734754
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'forward'), 'deadline': 1, 't': 19, 'action': 'forward', 'reward': 0.425415734754107, 'waypoint': 'forward'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('forward', 'green', None, 'forward')
Agent followed the waypoint forward. (rewarded 0.43)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 20
\-------------------------

Environment.reset(): Trial set up with start = (5, 4), destination = (1, 7), deadline = 35
Simulating trial

epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9780; alpha = 0.0100
Simulating tria

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 6), heading: (1, 0), action: None, reward: -4.34409218182
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 1, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 1, 't': 19, 'action': None, 'reward': -4.344092181818146, 'waypoint': 'right'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('right', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.34)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 23
\-------------------------

Environment.reset(): Trial set up with start = (7, 3), destination = (3, 4), deadline = 25
Simulating trial. . . 
epsilon = 0.9737; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9737; alpha = 0.0100
Simulat

state variables below
('right', 'red', 'left', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 7), heading: (0, 1), action: left, reward: -10.4934966877
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', 'left', 'left'), 'deadline': 16, 't': 9, 'action': 'left', 'reward': -10.493496687691987, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', 'left')
Agent attempted driving left through a red light. (rewarded -10.49)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
state variables below
('right', 'green', 'forward', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 7), heading: (-1, 0), action: right, reward: 2.72516265071
Environment.act(): Step data: {'inputs': {'ligh


Environment.reset(): Trial set up with start = (7, 4), destination = (1, 2), deadline = 20
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9664; alpha = 0.0100
Simulating t

| Step 21 Results
\-------------------

Environment.step(): t = 21
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 7), heading: (-1, 0), action: None, reward: -5.15867167444
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 1, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 4, 't': 21, 'action': None, 'reward': -5.158671674444718, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.16)
12% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
state variables below
('left', 'green', 'right', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 6), heading: (0, -1), action: right, reward: -0

state variables below
('forward', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: left, reward: -9.11087480632
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 14, 't': 11, 'action': 'left', 'reward': -9.110874806316573, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent attempted driving left through a red light. (rewarded -9.11)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('forward', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: forward, reward: -9.79055084177
Environment.act(): Step data: {'inputs': {'li

Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9523; alpha =

| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 6), heading: (1, 0), action: None, reward: 2.58728722173
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 20, 't': 5, 'action': None, 'reward': 2.5872872217337552, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.59)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
state variables below
('forward', 'green', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 6), heading: (1, 0), action: forward, reward: 1.77114833374
Environm

state variables below
('forward', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: forward, reward: -40.9147909817
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 5, 't': 15, 'action': 'forward', 'reward': -40.914790981651116, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -40.91)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
state variables below
('forward', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: left, reward: 

epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.9359; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('right', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 3), heading: (-1, 0), action: forward, reward: 1.58354764693
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 20, 't': 0, 'action': 'forward

('forward', 'red', 'right', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: forward, reward: -10.6559144813
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 12, 't': 23, 'action': 'forward', 'reward': -10.655914481279149, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent attempted driving forward through a red light. (rewarded -10.66)
31% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: left, reward: -40.9889659256
Environment.act(): Step data: {'inputs': {'light': 'red', 'on

Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: left, reward: 2.34228547708
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 11, 't': 9, 'action': 'left', 'reward': 2.3422854770815817, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.34)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 7), heading: (0, 1), action: right, reward: 1.13155429135
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadli

| Step 26 Results
\-------------------

Environment.step(): t = 26
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 2), heading: (1, 0), action: right, reward: -0.314441349717
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 4, 't': 26, 'action': 'right', 'reward': -0.3144413497170271, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded -0.31)
10% of time remaining to reach destination.

/-------------------
| Step 27 Results
\-------------------

Environment.step(): t = 27
state variables below
('right', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 2), heading: (1, 0), action: left, reward: -9.05736221698
Environment

Environment.act() [POST]: location: (5, 6), heading: (0, -1), action: forward, reward: 1.71467678195
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'left', None), 'deadline': 14, 't': 11, 'action': 'forward', 'reward': 1.7146767819538522, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', None)
Agent drove forward instead of right. (rewarded 1.71)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('right', 'green', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 6), heading: (0, -1), action: None, reward: -5.87302784279
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 1, 'light': 'green', 'state': ('rig

('left', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 7), heading: (0, -1), action: forward, reward: 0.832491918326
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 9, 't': 11, 'action': 'forward', 'reward': 0.8324919183263926, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent drove forward instead of left. (rewarded 0.83)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('left', 'green', 'left', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: forward, reward: 1.11019356876
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'r


/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('forward', 'green', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (1, 2), heading: (0, -1), action: forward, reward: 2.21024072304
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 17, 't': 8, 'action': 'forward', 'reward': 2.210240723036404, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', None)
Agent followed the waypoint forward. (rewarded 2.21)
64% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 47
\-------------------------

Environment.reset(): Trial set up with start = (3, 2), destination 

| Step 21 Results
\-------------------

Environment.step(): t = 21
state variables below
('forward', 'green', 'forward', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), action: forward, reward: 1.29434536556
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', 'left'), 'deadline': 4, 't': 21, 'action': 'forward', 'reward': 1.2943453655595378, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', 'left')
Agent followed the waypoint forward. (rewarded 1.29)
12% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
state variables below
('forward', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), ac

| Step 18 Results
\-------------------

Environment.step(): t = 18
state variables below
('left', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: right, reward: -0.0833630575848
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 7, 't': 18, 'action': 'right', 'reward': -0.08336305758475482, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent drove right instead of left. (rewarded -0.08)
24% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: right, reward: 1.12766568086
Environment.act():

Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'left', 'left': 'right'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'right', 'forward'), 'deadline': 10, 't': 20, 'action': 'forward', 'reward': 0.8728481159280783, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'right', 'forward')
Agent drove forward instead of right. (rewarded 0.87)
30% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
state variables below
('right', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 6), heading: (-1, 0), action: None, reward: 1.15552252561
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 9, 't': 21, 'action': None, 'reward': 1.155522525611

55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('right', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 5), heading: (0, 1), action: right, reward: 1.64741418002
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 11, 't': 9, 'action': 'right', 'reward': 1.6474141800216187, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.65)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
state variables below
('right', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 5), h

Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8525; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables b

| Step 25 Results
\-------------------

Environment.step(): t = 25
state variables below
('right', 'green', 'forward', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 6), heading: (0, 1), action: left, reward: -20.2085517696
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'left', 'left': 'forward'}, 'violation': 3, 'light': 'green', 'state': ('right', 'green', 'forward', 'right'), 'deadline': 5, 't': 25, 'action': 'left', 'reward': -20.208551769595093, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'forward', 'right')
Agent attempted driving left through traffic and cause a minor accident. (rewarded -20.21)
13% of time remaining to reach destination.

/-------------------
| Step 26 Results
\-------------------

Environment.step(): t = 26
state variables below
('right', 'red', 'forward', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: l

Environment.step(): t = 12
state variables below
('right', 'green', 'forward', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 4), heading: (0, 1), action: left, reward: -20.7835414648
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'right', 'left': 'forward'}, 'violation': 3, 'light': 'green', 'state': ('right', 'green', 'forward', 'forward'), 'deadline': 13, 't': 12, 'action': 'left', 'reward': -20.783541464818928, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'forward', 'forward')
Agent attempted driving left through traffic and cause a minor accident. (rewarded -20.78)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('right', 'red', 'forward', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 4), heading: (0, 1)

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 3), heading: (-1, 0), action: forward, reward: 1.83454156325
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 11, 't': 14, 'action': 'forward', 'reward': 1.834541563251607, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', None)
Agent followed the waypoint forward. (rewarded 1.83)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('forward', 'red', 'forward', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 3), heading: (-1, 0), action: None, reward: 1.95976333209
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, '

| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('left', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: forward, reward: -10.1811370996
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 12, 't': 8, 'action': 'forward', 'reward': -10.181137099552902, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent attempted driving forward through a red light. (rewarded -10.18)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('left', 'green', 'forward', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 3), heading: (0, -1), action: left, reward: 0

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), action: forward, reward: -0.17753235977
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 8, 't': 12, 'action': 'forward', 'reward': -0.17753235976971538, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent drove forward instead of right. (rewarded -0.18)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('right', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), action: right, reward: -20.8488981629
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'forward'}, '

epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.8021; alpha = 0.0100
Simulating tria

('right', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 2), heading: (0, 1), action: None, reward: -5.91381969302
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 1, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 1, 't': 19, 'action': None, 'reward': -5.913819693021896, 'waypoint': 'right'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('right', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.91)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 66
\-------------------------

Environment.reset(): Trial set up with start = (2, 2), destination = (4, 6), deadline = 20
Simulating trial. . . 
epsilon = 0.7900; alpha = 0.0100
Simulating trial. . . 
epsilon = 0

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (0, 1), action: forward, reward: -10.6927230643
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 18, 't': 12, 'action': 'forward', 'reward': -10.692723064330833, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -10.69)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('left', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (0, 1), action: left, reward: -9.70154943166
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation':

epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7712; alpha = 0.0100
Simulating tria

epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7584; alpha = 0.0100
Simulating tria

| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('left', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 5), heading: (1, 0), action: left, reward: 2.41481255143
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 15, 't': 15, 'action': 'left', 'reward': 2.414812551425113, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 2.41)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
state variables below
('forward', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 5), heading: (1, 0), action: None, reward: -5.30053266136
Enviro


Environment.step(): t = 24
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 2), heading: (-1, 0), action: None, reward: 1.78303103321
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 6, 't': 24, 'action': None, 'reward': 1.7830310332092987, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.78)
17% of time remaining to reach destination.

/-------------------
| Step 25 Results
\-------------------

Environment.step(): t = 25
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 2), heading: (-1, 0), action: None, reward: -4.05478281475
Environment.act(): Step data: {'inputs': {'light':


/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 6), heading: (1, 0), action: left, reward: 1.64511483499
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 9, 't': 11, 'action': 'left', 'reward': 1.645114834988844, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.65)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 7), heading: (0, 1), action: right, reward: 0.2754244

Agent attempted driving forward through a red light. (rewarded -9.02)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 79
\-------------------------

Environment.reset(): Trial set up with start = (5, 5), destination = (8, 3), deadline = 25
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.7038; alpha = 0.

| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('left', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (0, 1), action: None, reward: 1.86063487939
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 6, 't': 14, 'action': None, 'reward': 1.8606348793918501, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.86)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('left', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (0, 1), action: forward, reward: -9.86535512047
Environment.act()

Agent previous state: ('right', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.20)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
state variables below
('right', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: right, reward: 2.78399380981
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 19, 't': 6, 'action': 'right', 'reward': 2.7839938098111765, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent followed the waypoint right. (rewarded 2.78)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('forward', 'green'

Environment.act() [POST]: location: (2, 4), heading: (0, -1), action: left, reward: 0.2084940109
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 5, 't': 20, 'action': 'left', 'reward': 0.20849401089980701, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove left instead of forward. (rewarded 0.21)
16% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
state variables below
('right', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (1, 0), action: right, reward: 1.52980141311
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 

Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', 'right'), 'deadline': 19, 't': 6, 'action': None, 'reward': 0.9857333148219316, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', 'right')
Agent properly idled at a red light. (rewarded 0.99)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: left, reward: 1.87630283182
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 18, 't': 7, 'action': 'left', 'reward': 1.876302831820915, 'waypoint': 'left'}
A

epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6600; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('right', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), 

Environment.step(): t = 17
state variables below
('right', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 5), heading: (-1, 0), action: left, reward: -10.9315656102
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 3, 't': 17, 'action': 'left', 'reward': -10.931565610232008, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -10.93)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
state variables below
('right', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 5), heading: (-1, 0), action: None, reward: 0.219028947187
Environment.act(): Step data: {'inputs': {'l


Environment.step(): t = 6
state variables below
('forward', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: right, reward: -19.5168212313
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 3, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 14, 't': 6, 'action': 'right', 'reward': -19.516821231314594, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent attempted driving right through traffic and cause a minor accident. (rewarded -19.52)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: forward, reward: -9

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 1.81)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('left', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: left, reward: -20.290026809
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 3, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 20, 't': 5, 'action': 'left', 'reward': -20.29002680900461, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent attempted driving left through traffic and cause a minor accident. (rewarded -20.29)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.ste

Agent previous state: ('right', 'red', None, 'left')
Agent followed the waypoint right. (rewarded 2.57)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables below
('forward', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: forward, reward: 2.69423808467
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 21, 't': 4, 'action': 'forward', 'reward': 2.6942380846683887, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 2.69)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('fo


/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
state variables below
('right', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 5), heading: (0, 1), action: None, reward: 1.36137544811
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 8, 't': 17, 'action': None, 'reward': 1.3613754481071612, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.36)
28% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
state variables below
('right', 'red', 'right', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 5), heading: (0, 1), action: forward, reward: -

('forward', 'red', 'right', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: forward, reward: -10.2109907626
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'right', 'left'), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': -10.210990762625794, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', 'left')
Agent attempted driving forward through a red light. (rewarded -10.21)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('forward', 'red', 'left', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: None, reward: 2.66166694908
Environment.act(): Step data: {'inputs': {'light': 'r

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.61)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 2), heading: (1, 0), action: left, reward: -10.5031687117
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 10, 't': 10, 'action': 'left', 'reward': -10.503168711675109, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -10.50)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
state variables below
('for

Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.6058; alpha =

30% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
state variables below
('right', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 2), heading: (-1, 0), action: right, reward: 1.81159543496
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'forward'), 'deadline': 9, 't': 21, 'action': 'right', 'reward': 1.8115954349592611, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 1.81)
27% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
state variables below
('right', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [P

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
state variables below
('left', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: forward, reward: -0.0961475596739
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 6, 't': 19, 'action': 'forward', 'reward': -0.09614755967393773, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent drove forward instead of left. (rewarded -0.10)
20% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
state variables below
('left', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 6), heading: (1, 0), 

| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('left', 'red', None, 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 7), heading: (-1, 0), action: None, reward: 2.56376863292
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'right'), 'deadline': 12, 't': 8, 'action': None, 'reward': 2.5637686329181895, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 2.56)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: right, reward: 0.353102389324
Environment.act()

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 6), heading: (0, -1), action: right, reward: 1.60190442678
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'forward', None), 'deadline': 21, 't': 14, 'action': 'right', 'reward': 1.6019044267814142, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'forward', None)
Agent followed the waypoint right. (rewarded 1.60)
57% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 6), heading: (0, -1), action: None, reward: 1.81868447018
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violati

epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5403; alpha = 0.0100
Simulating tria

Agent previous state: ('right', 'red', None, 'left')
Agent followed the waypoint right. (rewarded 1.61)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
state variables below
('right', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 7), heading: (1, 0), action: None, reward: -5.67319509799
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 1, 'light': 'green', 'state': ('right', 'green', 'left', None), 'deadline': 17, 't': 3, 'action': None, 'reward': -5.673195097988078, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.67)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables below
(

Environment.step(): t = 18
state variables below
('left', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: left, reward: 1.43529935283
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 2, 't': 18, 'action': 'left', 'reward': 1.4352993528297862, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 1.44)
5% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 102
\-------------------------

Environment.reset(): Trial set up with start = (6, 7), destination = (3, 5), deadline = 25
Simulating trial. . . 
epsilon = 0.5234; alpha = 0.0100
Simulating 

| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('forward', 'red', 'right', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 6), heading: (1, 0), action: None, reward: 2.23325793023
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', 'left'), 'deadline': 17, 't': 8, 'action': None, 'reward': 2.233257930227027, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', 'left')
Agent properly idled at a red light. (rewarded 2.23)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('forward', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 6), heading: (1, 0), action: None, reward: 1.60163748596

\-------------------

Environment.step(): t = 2
state variables below
('right', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 7), heading: (0, 1), action: left, reward: -9.49782652752
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 33, 't': 2, 'action': 'left', 'reward': -9.497826527519878, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent attempted driving left through a red light. (rewarded -9.50)
91% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
state variables below
('right', 'red', 'left', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 7), heading: (-1, 0), action: right, reward: 2.47662545886
Environment.a

| Step 17 Results
\-------------------

Environment.step(): t = 17
state variables below
('left', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (0, -1), action: left, reward: -39.3077828555
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 18, 't': 17, 'action': 'left', 'reward': -39.30778285545755, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -39.31)
49% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
state variables below
('left', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), he

epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.5062; alpha = 0.0100
Simulating tria

('left', 'green', None, 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: right, reward: -0.205203539418
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'right'), 'deadline': 7, 't': 13, 'action': 'right', 'reward': -0.2052035394180426, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'right')
Agent drove right instead of left. (rewarded -0.21)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('right', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: forward, reward: -9.21809249003
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 


/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
state variables below
('forward', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 5), heading: (0, -1), action: right, reward: 1.05157664892
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 14, 't': 11, 'action': 'right', 'reward': 1.0515766489158485, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 1.05)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('left', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 5), heading: (0, -1), 

| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('left', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 4), heading: (0, -1), action: forward, reward: -0.364390323177
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 7, 't': 13, 'action': 'forward', 'reward': -0.3643903231770783, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent drove forward instead of left. (rewarded -0.36)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('left', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 3), heading: (0, -1), action: forward, re

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 6), heading: (0, -1), action: forward, reward: -0.164993267472
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 2, 't': 28, 'action': 'forward', 'reward': -0.164993267472053, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove forward instead of left. (rewarded -0.16)
3% of time remaining to reach destination.

/-------------------
| Step 29 Results
\-------------------

Environment.step(): t = 29
state variables below
('left', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 6), heading: (0, -1), action: left, reward: -19.7689792309
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'left', 'left': None}, 'viol

epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4536; alpha = 0.0100
Simulating tria

Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.4267; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('forward', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() 


Environment.reset(): Trial set up with start = (8, 4), destination = (2, 2), deadline = 20
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3993; alpha = 0.0100
Simulating t

Environment.act() [POST]: location: (7, 5), heading: (0, -1), action: right, reward: 1.61827275333
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'left'), 'deadline': 1, 't': 24, 'action': 'right', 'reward': 1.6182727533276822, 'waypoint': 'right'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('right', 'red', None, 'left')
Agent followed the waypoint right. (rewarded 1.62)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 118
\-------------------------

Environment.reset(): Trial set up with start = (4, 4), destination = (2, 2), deadline = 20
Simulating trial. . . 
epsilon = 0.3809; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3809; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.3809; alpha = 0.0100
Simulating trial. . .

Environment.act() [POST]: location: (4, 4), heading: (0, -1), action: right, reward: 0.632533342847
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 6, 't': 14, 'action': 'right', 'reward': 0.6325333428469871, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent drove right instead of forward. (rewarded 0.63)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('left', 'red', 'forward', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 4), heading: (0, -1), action: None, reward: 1.91331372104
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left

\-------------------

Environment.step(): t = 1
state variables below
('forward', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: None, reward: 2.82395251957
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 24, 't': 1, 'action': None, 'reward': 2.8239525195656663, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.82)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('forward', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: left, reward: -39.6961114627
Environme


Environment.step(): t = 1
state variables below
('forward', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 3), heading: (0, 1), action: right, reward: 1.08302304571
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'left'), 'deadline': 24, 't': 1, 'action': 'right', 'reward': 1.0830230457149528, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'left')
Agent drove right instead of forward. (rewarded 1.08)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('left', 'red', 'forward', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 3), heading: (0, 1), action: None, reward: 1.26974083808
Environment.act(): Step data: {'inputs

Agent properly idled at a red light. (rewarded 1.13)
67% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 2), heading: (1, 0), action: left, reward: -9.6254779131
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 20, 't': 10, 'action': 'left', 'reward': -9.625477913102623, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -9.63)
63% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
state variables below
('forward', 'red', None, None)
<built-in method keys of dict 


Environment.step(): t = 3
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 5), heading: (0, 1), action: right, reward: 1.38469058663
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 22, 't': 3, 'action': 'right', 'reward': 1.384690586632428, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded 1.38)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables below
('forward', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 5), heading: (0, 1), action: None, reward: -5.02003636101
Environment.act(): Step data: {'inputs': {'light'

Agent attempted driving left through a red light. (rewarded -9.49)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('forward', 'red', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 3), heading: (0, 1), action: None, reward: 1.23037621103
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 12, 't': 8, 'action': None, 'reward': 1.230376211034558, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.23)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('forward', 'green', None, None)
<built-in method keys of d

Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.2288; alpha = 0.0100

/-------------------
| Step 0 Results
\--------

Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1994; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('forward', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [

Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1798; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables b

Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha = 0.0100
Simulating trial. . . 
epsilon = 0.1502; alpha =

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: forward, reward: 2.07635651603
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 19, 't': 1, 'action': 'forward', 'reward': 2.0763565160269337, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', None)
Agent followed the waypoint forward. (rewarded 2.08)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (3, 4), heading: (1, 0), action: forward, reward: 2.28605543592
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'lef

('forward', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: None, reward: 2.14222070612
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'left'), 'deadline': 26, 't': 4, 'action': None, 'reward': 2.1422207061220027, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.14)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('forward', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 6), heading: (1, 0), action: forward, reward: 2.67969002989
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': Non

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('left', 'red', 'right', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: None, reward: 0.908012061984
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'right', 'forward'), 'deadline': 13, 't': 7, 'action': None, 'reward': 0.9080120619839871, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', 'forward')
Agent properly idled at a red light. (rewarded 0.91)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('left', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 3), heading: (0, 1), action: ri

Agent properly idled at a red light. (rewarded 2.39)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('forward', 'red', 'forward', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 3), heading: (-1, 0), action: None, reward: 1.32808670844
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', 'left'), 'deadline': 10, 't': 15, 'action': None, 'reward': 1.3280867084376575, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', 'left')
Agent properly idled at a red light. (rewarded 1.33)
36% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
state variables below
('forward', 'green', None, 'left')
<built-in met

| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('forward', 'green', 'left', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 5), heading: (0, -1), action: forward, reward: 1.45023349814
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', 'right'), 'deadline': 6, 't': 14, 'action': 'forward', 'reward': 1.450233498142388, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', 'right')
Agent followed the waypoint forward. (rewarded 1.45)
25% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 155
\-------------------------

Environment.reset(): Trial set up with start = (8, 3), destination = (7, 6), dead

Agent properly idled at a red light. (rewarded 0.95)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 2), heading: (1, 0), action: None, reward: 1.21350171385
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 22, 't': 8, 'action': None, 'reward': 1.2135017138519242, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.21)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('forward', 'red', 'left', None)
<built-in method keys of dict object at 0x114fa

| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('right', 'red', 'forward', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), heading: (-1, 0), action: None, reward: 0.921898296015
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', 'forward'), 'deadline': 12, 't': 8, 'action': None, 'reward': 0.9218982960147591, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', 'forward')
Agent properly idled at a red light. (rewarded 0.92)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('right', 'green', 'right', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: forwa

Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 28, 't': 7, 'action': 'left', 'reward': 2.876523862711754, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.88)
77% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
state variables below
('left', 'green', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: forward, reward: 1.29709064208
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', None), 'deadline': 27, 't': 8, 'action': 'forward', 'reward': 1.297090642082476, 'waypoint': '

epsilon = -0.0592; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.0592; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.0592; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('right', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 7), heading: (0, -1), action: right, reward: 2.44405438585
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 35, 't': 0, 'action': 'right', 'reward': 2.444054385849116, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.44)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
state variables below
('right', 'red', 'right'

Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 24, 't': 1, 'action': 'forward', 'reward': 2.4932022181138143, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', None)
Agent followed the waypoint forward. (rewarded 2.49)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('left', 'green', 'forward', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: right, reward: 1.48498880287
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', 'forward'), 'deadline': 23, 't': 2, 'action': 'right', 'rew

epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.1288; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('right', 'red', 'right', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: locat

Environment.step(): t = 8
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 2), heading: (0, -1), action: None, reward: 2.57428784642
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 8, 'action': None, 'reward': 2.574287846417411, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.57)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 7), heading: (0, -1), action: forward, reward: 2.7500067


Environment.step(): t = 16
state variables below
('left', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 2), heading: (0, 1), action: forward, reward: -0.55471479868
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 4, 't': 16, 'action': 'forward', 'reward': -0.5547147986804659, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent drove forward instead of left. (rewarded -0.55)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
state variables below
('left', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 2), heading: (1, 0), action: left, reward: 1.44365020386
Environment.act(): Step d

epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2077; alpha = 0.01

Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.2369; alpha = 0.0100
Simulating trial. . . 
epsilon 

state variables below
('left', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: None, reward: 1.4063640594
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 20, 't': 0, 'action': None, 'reward': 1.406364059400351, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.41)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
state variables below
('left', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: None, reward: 1.97135695648
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': Non


Environment.step(): t = 1
state variables below
('forward', 'green', 'left', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: forward, reward: 2.53438311269
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 24, 't': 1, 'action': 'forward', 'reward': 2.5343831126939174, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 2.53)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('forward', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 3), heading: (0, -1), action: right, reward: 0.572272240069
Environment.act():

| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('forward', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 2), heading: (0, -1), action: None, reward: 1.79228994719
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 11, 't': 14, 'action': None, 'reward': 1.7922899471872062, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent properly idled at a red light. (rewarded 1.79)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 2), heading: (0, -1), action: None, reward: 0.78251

\-------------------

Environment.step(): t = 12
state variables below
('right', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 6), heading: (0, -1), action: right, reward: 1.97580805246
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'forward'), 'deadline': 13, 't': 12, 'action': 'right', 'reward': 1.9758080524597594, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'forward')
Agent followed the waypoint right. (rewarded 1.98)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('right', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 6), heading: (1, 0), action: right, reward: 0.894987578931
Environment.act():

50% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 37
\-------------------------

Environment.reset(): Trial set up with start = (5, 2), destination = (8, 7), deadline = 20
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.3609; alpha = 0.0100



Environment.step(): t = 11
state variables below
('forward', 'green', 'forward', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 3), heading: (0, 1), action: forward, reward: 1.11499200524
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', 'forward'), 'deadline': 19, 't': 11, 'action': 'forward', 'reward': 1.1149920052359785, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', 'forward')
Agent followed the waypoint forward. (rewarded 1.11)
60% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
state variables below
('forward', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: lo

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.04)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
state variables below
('forward', 'red', 'right', 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: None, reward: 2.19374993798
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', 'forward'), 'deadline': 27, 't': 3, 'action': None, 'reward': 2.1937499379811722, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', 'forward')
Agent properly idled at a red light. (rewarded 2.19)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables be

epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4522; alpha = 0.01

epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.4875; alpha = 0.0100

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
state variables below
('right', 'red', None, 'forward')
<built-in method keys of 


Environment.step(): t = 13
state variables below
('right', 'red', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 7), heading: (1, 0), action: None, reward: -0.154630245541
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', None), 'deadline': 17, 't': 13, 'action': None, 'reward': -0.1546302455411266, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', None)
Agent properly idled at a red light. (rewarded -0.15)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
state variables below
('right', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (1, 2), heading: (0, 1), action: right, reward: 1.47850212876
Environment.act(): Step data: {'in

epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.5305; alpha = 0.0100

/-------------------
| Step 0 Results
\--------------

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables below
('forward', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 3), heading: (1, 0), action: forward, reward: 2.79079362993
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 26, 't': 4, 'action': 'forward', 'reward': 2.7907936299276788, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent followed the waypoint forward. (rewarded 2.79)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('forward', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (5, 3), heading: (1, 0), actio


Environment.step(): t = 7
state variables below
('left', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 7), heading: (-1, 0), action: left, reward: 2.37054442297
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 13, 't': 7, 'action': 'left', 'reward': 2.370544422974808, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.37)
60% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 63
\-------------------------

Environment.reset(): Trial set up with start = (8, 5), destination = (5, 7), deadline = 25
Simulating trial. . . 
epsilon = -0.5885; alpha = 0.0100
Simulating trial. . .

| Step 6 Results
\-------------------

Environment.step(): t = 6
state variables below
('left', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: left, reward: 0.934252636082
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 14, 't': 6, 'action': 'left', 'reward': 0.9342526360820238, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 0.93)
65% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 65
\-------------------------

Environment.reset(): Trial set up with start = (3, 5), destination = (6, 4), deadline = 20
Simulating trial. . . 
epsilon

80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables below
('right', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: right, reward: 2.69897562852
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'left'), 'deadline': 16, 't': 4, 'action': 'right', 'reward': 2.6989756285235007, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'left')
Agent followed the waypoint right. (rewarded 2.70)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('forward', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 2), headi

('right', 'green', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 3), heading: (1, 0), action: right, reward: 2.09764718577
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'forward'), 'deadline': 20, 't': 0, 'action': 'right', 'reward': 2.0976471857740218, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 2.10)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
state variables below
('left', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (6, 3), heading: (1, 0), action: None, reward: 1.0693146775
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'rig

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('right', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 3), heading: (0, 1), action: right, reward: 2.21484129702
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 12, 't': 13, 'action': 'right', 'reward': 2.2148412970233977, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.21)
44% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 72
\-------------------------

Environment.reset(): Trial set up with start = (7, 2), destination = (2, 6), deadline = 25
Simulatin

Environment.act() [POST]: location: (8, 6), heading: (0, -1), action: left, reward: 2.13188320413
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 18, 't': 2, 'action': 'left', 'reward': 2.131883204129733, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.13)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
state variables below
('forward', 'green', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 5), heading: (0, -1), action: forward, reward: 1.96390837771
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation'

Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: forward, reward: 1.94070882181
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 24, 't': 1, 'action': 'forward', 'reward': 1.9407088218080593, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.94)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('forward', 'red', None, 'forward')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 2), heading: (0, -1), action: right, reward: 1.62805019618
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forwa

Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7168; alpha = 0.0100
Simulating trial. . . 
epsilon 

epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7306; alpha = 0.01

<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 5), heading: (1, 0), action: forward, reward: 1.74561277454
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 8, 't': 12, 'action': 'forward', 'reward': 1.7456127745379968, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.75)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
state variables below
('forward', 'red', 'left', 'right')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 6), heading: (0, 1), action: right, reward: -0.102995085387
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': 'lef

Environment.act() [POST]: location: (6, 2), heading: (1, 0), action: left, reward: 2.39871473892
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 29, 't': 6, 'action': 'left', 'reward': 2.398714738920017, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.40)
80% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('forward', 'green', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 2), heading: (1, 0), action: forward, reward: 1.31487766277
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left')

epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7508; alpha = 0.01

epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7766; alpha = 0.01

Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon = -0.7951; alpha = 0.0100
Simulating trial. . . 
epsilon 

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
state variables below
('left', 'green', 'left', 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 2), heading: (0, 1), action: left, reward: 1.95088173419
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', 'left'), 'deadline': 19, 't': 1, 'action': 'left', 'reward': 1.9508817341901852, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', 'left')
Agent followed the waypoint left. (rewarded 1.95)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
state variables below
('left', 'red', None, 'left')
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (4, 2), heading: (0, 1), action: None, rewa

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.26)
80% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
state variables below
('forward', 'green', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: forward, reward: 2.06140725757
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 24, 't': 6, 'action': 'forward', 'reward': 2.0614072575702496, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.06)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
state variables below
('forwa

80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
state variables below
('left', 'green', 'forward', None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: location: (8, 2), heading: (0, 1), action: forward, reward: 0.699907751435
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', None), 'deadline': 16, 't': 4, 'action': 'forward', 'reward': 0.699907751435079, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 0.70)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
state variables below
('left', 'red', None, None)
<built-in method keys of dict object at 0x114faab40>
Environment.act() [POST]: lo