In [1]:
import numpy as np
import cv2
from PIL import Image
import socket
# import shutil
# from skimage import io

class State:
    def __init__(self, state_data):
        self.state_data = np.asarray(state_data)
  
    def process_state(self):
        pass
  
    def get_batch_tensor(self):
        holder = np.asarray(self.state_data)
        holder.reshape((1, ) + holder.shape)
        return holder
  
    def get_individual_tensor(self):
        return np.asarray(self.state_data)

    def get_shape(self):
        return self.state_data.shape
  
    def display(self):
        print(self.state_data)
        
# ------------------------------------

class Frame(State):
    def __init__(self, state_data, crop_factor=None, destination_size=None, vert_cent=0.5):
        State.__init__(self, state_data)
#         self.state_data = self.process_state(crop_factor, vert_cent, destination_shape)
        self.state_data = self.process_state([0.7, 1.0], 0.7, (128,64))
  
    def process_state(self, crop_factor, vert_cent, destination_shape):
        """
        Does all the processing of the frame using the helper functions
        """
        frame = self.crop_frame(self.state_data, crop_factor, vert_cent)
        frame = self.normalise_frame(frame)
        frame = self.gray_scale(frame) # cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        assert len(frame.shape) == 2
        frame = self.downsample_frame(frame, destination_shape)
        return frame

  
    def gray_scale(self, frame, gray_scale_factor=[0.3, 0.3, 0.3]):
        frame = np.dot(frame, np.asarray(gray_scale_factor))
        return frame

    def normalise_frame(self, frame):
        frame = frame.astype('float32') / 255.0
        return frame
  
    def downsample_frame(self, frame, destination_shape):
        """
        downsamples the frame. decreases the resolution
        """
        frame = cv2.resize(np.asarray(frame), dsize=destination_shape, interpolation=cv2.INTER_CUBIC)
        return frame
  
    def crop_frame(self, frame, crop_factor, vert_cent):
        """
        input is the frame
        output is the cropped frame
        crop_factor is the ratio at which you want to crop the height and width(0.8, 0.8)
        cent is the ratio at which the centre of the cropped frame should be
        """
        if crop_factor is None:
          return frame
    
        height_factor = int((crop_factor[0]*frame.shape[0]) // 2)
        width_factor = int((crop_factor[1]*frame.shape[1]) // 2)
        vert_cent = int(frame.shape[0]*vert_cent)
        width_cent = int(frame.shape[1]*0.5)

        frame = frame[vert_cent - height_factor: vert_cent + height_factor, 
                      width_cent - width_factor: width_cent + width_factor]
        return frame

# ------------------------------------
class DataBuffer:
    """
    Keeps track of n latest states 
    """
  
    def __init__(self, size=1):
        self.buffer = []
        self.size = size

    def get_input_tensor(self, in_batch=True):
        arr = np.array(self.buffer)
        if self.size == 1 or in_batch:
            return arr
        else:
            return arr.reshape((1, ) + arr.shape)
    
    def get_input_shape(self):
        return np.asarray(self.current_state[0]).shape

    def assign_to_buffer(self, state):
        if isinstance(state, State):
            state = state.get_individual_tensor()
        if len(self.buffer) >= self.size:
            self.buffer.pop(0)
        self.buffer.append(state)
        
# ------------------------------------

class FrameBuffer(DataBuffer):
    def __init__(self, size = 4):
        DataBuffer.__init__(self, size=size)
    
    def get_input_shape(self):
        return self.get_input_tensor().shape
  
    def get_input_tensor(self, in_batch=True):
        temp = np.array(self.buffer)
        return  temp.transpose((1, 2, 0))
    
    def assign_to_buffer(self, state):
        if isinstance(state, State):
            state = state.get_individual_tensor()
        # if buffer not initialised
        if len(self.buffer) == 0:
            self.buffer = [state]
            return

        if len(self.buffer) >= self.size:
            self.buffer.pop(0)
        
        self.buffer.append(state)

In [2]:
from pathlib import Path
import os
import os.path
import random

class EnvironmentWrapper:
    
    def __init__(self):
        
        
        # initialise comms with the simulator here
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # initialise the socket
        # connect to localhost, port 2345 
        self.sock.bind(("127.0.0.1", 4444))
        self.sock.listen(1)
        print("Waiting to connect to Simulator...")
        self.clientsocket, _ = self.sock.accept() # connect to simulator [BLOCKING]
        print("Connected to simulator!")
        # =========================================
        
        
        # initialising frame buffer
        self.buffer_size = 4 # could change this 
        
         # this is the FrameBuffer that keeps track of the latest frames.
         #initialising the frame buffer by just giving it multiple copies of the same starting frame
        # =========================================
        
        self.current_state = None
        
        self.current_buffer = None
        
        self.prev_dist = 0
        
        self.time_steps = 0
        
        self.done = False
        
        self.max_time_steps_per_episode = 500 #change this based on the enviorment
        
        # initialise buffer
        self.current_buffer = FrameBuffer(size = self.buffer_size)
        
        # =========================================
        
        
        # Create target directory if it doesn't exist
       
        parent_path = os.path.abspath(os.path.join("", os.pardir))
        self.final_path = parent_path + "/simulation/Screenshots"
        if not os.path.exists(self.final_path):
            os.mkdir(self.final_path)
            print("Directory " , self.final_path,  " Created ")
        else:    
            print("Directory " , self.final_path,  " already exists")
            
        
        # Save scr1 to Screenshots if it doesn't already exist
        if not os.path.exists(self.final_path + '/scr1.png'):
            scr1 = Image.open(self.parent_path + '/simulation/scr1.png', 'r')
            scr1.save(self.final_path + "/scr1.png", "PNG")
        
        # reset actually initializes self.current_state, self.current_buffer etc.
        self.reset()
        
        
        
        self.action_space = ['as', 'ar', 'al', 'ds', 'dr', 'dl', 'bs', 'br', 'bl']
        
    
    
    def get_input_shape(self):
        """
        returns the input shape for the input layer of the network
        """
        return self.current_buffer.get_input_shape()
    
#     def get_state_shape(self):
#         """
#         not to be confused with the input shape. This is the shape of individual state (the shape of an individual processed shape of the environment)
#         """
#         return self.current_state
    
    def get_random_action(self):
        """
        returns a random action to be taken. [steering, acceleration, brake]
        """
        return random.choice(self.action_space)
    
    def get_action_at_index(self, index):
        return self.action_space[index]
    
    def get_num_action_space(self):
        """
        returns the number of permuations of valide actions. For Eg. [steer left, accelerate and no brake] is ONE action
        [steer right, accelerate and brake] is invalid as we cannot accelerate and brake at the same time.
        there are 9 possible actions I think?
        """
        return len(self.action_space)
    
    def reset(self):
        """
        resets the environment. self.done denotes whether the episode is done i.e. the car has crashed or we have stopped it
        """
        self.done = False
        self.time_steps = 0
        
        tup = self.step('reset') # initial step. Says don't do anything but send the first 4 frames and game info
        self.current_state, _, self.done = tup
        return self.current_state[0] # send only the frames
        
    
    def step(self, action):
        """ 
        does the action and returns the reward for that action along with the next state

        This function may get complicated as it has to interact with the car simulator throught sockets.
        """
        self.time_steps += 1
        
        if not self.is_done():
            # if the episode has not ended
            #=======================
            
            # send the action
            self.send_message(action)
            
            # wait for results from that action
            angle, distance, speed, self.done, frames_captured = self.get_game_stats() # blocking line
            # print("5: info:{0}, {1}, {2}, {3}, {4}".format(angle, distance, speed, self.done, frames_captured))
            
            
            # add images from path to current_buffer
            for i in range(1, frames_captured + 1):
                # each Frame object is then assigned to the FrameBuffer class in chronological order
                path = self.final_path + '/scr{0}.png'.format(i)
                self.current_buffer.assign_to_buffer(self.get_frame(path))
            
            buffer_tensor = self.current_buffer.get_input_tensor()
            # ========================================
            
            # calculate reward
            dist_delta = self.prev_dist - distance
            self.prev_dist = distance
            if abs(dist_delta) > 10:
                dist_delta = 5 # if there's too big a negative jump in the distance, the car has passed a checkpoint.
                # so, don't penalise it for that.
 
            reward = (dist_delta * 0.9) - (abs(angle) * 0.1)
            #=================
            
            # A buffer is a collection of consecutive frames that we feed to the NN. These frames are already processed.
            
            # the current state consists of all the information from the environment
            self.current_state = (buffer_tensor, angle, distance, speed)
            
            # this returns the state of the environment after the action has been completed, the reward for the action and if the episode ended.
            return self.current_state , reward, self.done
        else:
            return None
    
    def send_message(self, string):
        self.clientsocket.sendall(string.encode())
    
    def receive_message(self):
        data  = self.clientsocket.recv(256).decode()
        return data
    
    
    def is_done(self):
        """
        returns if the episode is finished
        """
        return self.done
        
    
    def get_frame(self, path: str) -> Frame:
        """
        communicates with the sim to get the latest state/frame. 
        returns a Frame object
        Get image from path then convert to np array then make a frame object
        """
        image = Image.open(path, 'r')
        image.load()
        np_data = np.asarray(image, dtype="float32" )
        return Frame(np_data)
    
    
    # def delete_screenshots(self, folder_path: str) -> None:
    #     """
    #     This method deletes the four screenshots saved in folder_path, along with the entire folder.
    #     Method must be called after all four screenshots are converted to Frame objects.
    #     """
    #     shutil.rmtree(folder_path)
    
    
    def get_current_state():
        """
        get the last n frames from the simulator (they might be stored in a folder by the simulator)
        and store them in a buffer and return them
        """
        return self.current_buff

    
    def get_game_stats(self):
        """
        returns a tuple of angle, distance from checkpoint and speed from the sim. Again requires comms with simulator.
        """
        # wait for info to arrive
        string = self.receive_message()
        
        # process string
        value_list = string.split(", ")
        angle = float(value_list[0])
        distance = float(value_list[1])
        speed = float(value_list[2])
        crashed_state = False
        if value_list[3] == '1':
            crashed_state = True
        frames_captured = int(value_list[4])
        # return tuple of values
        return angle, distance, speed, crashed_state, frames_captured
    
    def close(self):
        """
        in case we need to 'close' the environment
        """
        self.sock.close()
        self.clientsocket.close()


In [3]:
class Agent:
    def __init__(self, environment, network, run_only=False, eps_decay_rate=0.9975,max_exp_rate=1.0, min_exp_rate=0.05):
        self.env = environment # this should be the environment wrapper class
        
        if not run_only:
            self.exp_rate = max_exp_rate     # our network starts off with this exploration rate
        else:
            self.exp_rate = 0.0
            self.min_exp_rate = 0.0
        
        self.min_exp_rate = min_exp_rate  # have at least 0.01 exploration rate at all times
        
        self.decay_rate = eps_decay_rate   # decay the exploration rate at this rate
        
        self.time_step = 0     # keeps track of time steps
        
        self.network = network
    
    def take_action(self, current_state):
        # Implement the epsilon greedy strategy 
        result = random.random()                      # get a random number from 0 to 1 with linear distribution
        if result > self.get_exp_rate():              # if it falls over the explore rate, exploit
            # Get the action with the maximum q-value
            action = self.env.get_action_at_index(
                self.network.get_max_q_value_index(current_state, which_net = 'online', batch=False))  # exploit
        else:                                         # if it falls under the explore rate, explore
            action = self.env.get_random_action()          # explore (generate a random action from the environment class)
            
        self.increment_time_step()                    # increment time step as well as update the decay rate
        next_state, reward, done = self.env.step(action)# finally, take the action and record the reward
        
        return current_state, self.env.action_space.index(action), reward, next_state[0], done  # return an experience Tuple
        
    
    def reset_time_steps(self, i=0):
        self.timesteps = i
    
    def increment_time_step(self):
        self.time_step += 1
    
    def update_epsilon(self):
        if self.exp_rate > self.min_exp_rate:
            self.exp_rate = self.exp_rate * self.decay_rate
        else:
            self.exp_rate = self.min_exp_rate
    
    def get_exp_rate(self):
        return self.exp_rate

In [4]:
from keras import layers, models
import tensorflow as tf
from keras.optimizers import Adam, RMSprop
class NetworkTracker:
    
    def __init__(self, environment, source=True, verbose = True, network_name = None): # pass in the environment which has input shape of the frame
        self.network_name = network_name
        if source:
            self.model = models.load_model(self.network_name)
        else:
            self.model = self.define_model(environment)
        
        self.target_model = None
        self.clone_policy()
        
        self.verbose = 0
        if verbose:
            self.verbose = 1
              
    def define_model(self, env):
        
        inp_layer = layers.Input(env.get_input_shape())
        
        conv_1 = layers.Conv2D(filters=10, 
                                kernel_size=(4,4), 
                                activation='relu')(inp_layer) # first layer takes input shape from the environment
        
        maxpool_1 = layers.MaxPool2D((3, 3))(conv_1)
        
        conv_2 = layers.Conv2D(filters=20, kernel_size = (3, 3), strides=2, activation='relu')(maxpool_1)

        maxpool_2 = layers.MaxPool2D(3, 3)(conv_2)
        
        flatten = layers.Flatten()(maxpool_2)
        
        adv_dense_1 = layers.Dense(64, activation = 'relu')(flatten)
        
        adv_out = layers.Dense(env.get_num_action_space(), activation='linear', name = 'advantage') (adv_dense_1)
        
        val_dense_1 = layers.Dense(32, activation = 'relu')(flatten)
        
        val_out = layers.Dense(1, activation='linear', name = 'value') (val_dense_1)
        
        def q_out(x):
            value = x[0]
            advantage = x[1]
            return value + advantage
        
        q_output = layers.Lambda(q_out)([val_out, adv_out])
        
        model = models.Model(inp_layer, q_output, name='dueling_dqn')
        
        model.compile(optimizer=Adam(lr=0.0012), loss='mse')
        return model
    
    def get_q_values(self, states, which_net = 'online', batch = False):
        """
        get q values for specified states.
        INPUTS:
        1. states: the input states
        2. which_net: which net you want the prediction from
        3. batch: true if there are multiple states, false if there is only one state
        """
        if isinstance(states[0], DataBuffer): # if you have a list of buffers, convert them to numpy tensors
            states = np.asarray([i.get_input_tensor(in_batch=True) for i in states])
        
        if (not batch) and (not states.shape[0] == 1):
            states = np.expand_dims(states, axis=0)
        
        output_tensor = None
        
        if which_net == 'online':
            output_tensor = self.model.predict(states) 
        elif which_net == 'target':
            output_tensor = self.target_model.predict(states) 
        
        
        if not batch:
            return output_tensor[0]  # you want to convert the 2 dimensional output to 1 dimension to call argmax
        else:
            return output_tensor
    
    def get_max_q_value_index(self, states, which_net = 'online', batch=False):
        return np.argmax(self.get_q_values(states, which_net=which_net, batch = batch), axis=int(batch))
    
    def fit(self, states_batch, targets_batch):
        """
        Fit the states with the target batch
        """
        self.model.fit(states_batch, targets_batch, verbose=1)
        
    def clone_policy(self):
        """
        Clone the target policy
        """
        self.model.save(self.network_name)
        self.target_model = models.load_model(self.network_name)
                  
    def get_model_summary(self):
        """
        Return a summary of the defined model
        """
        return self.model.summary()
    
    def save_policy(self):
        self.model.save(self.network_name)

In [5]:
import random
import pickle
class Memory:
    def __init__(self, size, save_path, mem_source = None):
        self.save_path = save_path
        self.replay = []
        self.limit = size
        self.exp_count = 0
        if mem_source is not None:
            # pick up memory from the specified progess folder
            with open(mem_source + '/memory.pkl', 'rb') as file:
                self.replay = pickle.load(file)[:self.limit] # limit the list size based on the memory limit you specify
            
            self.exp_count = len(self.replay)
    
    def push(self, experience):
        self.exp_count += 1
        
        if self.exp_count < self.limit:
            self.replay.append(experience)  #append to experiences
        else:
            self.replay[self.exp_count%len(self.replay)] = experience  #wrap around if the memory capacity is reached
        assert len(self.replay) <= self.limit
        
    def is_usable(self, batch_size):
        return len(self.replay) >= batch_size
    
    def reset_replay_memory(self):
        self.exp_count = 0
        self.replay = []
        
    def save_to_disk(self):
        # dump memory in the 
        with open(self.save_path + '/memory.pkl', 'wb') as file:
            pickle.dump(self.replay, file)
        
    def sample(self, batch_size):
        return random.sample(self.replay, batch_size)

In [6]:
def extract_tensors(sample):
    """
    takes a sample of experiences and converts them into individual tensors.
    """
    states = []
    actions = []
    rewards = []
    next_states = []
    done_tensor = []
    for experience in sample:
        states.append(experience[0])
        actions.append(experience[1])
        rewards.append(experience[2])
        next_states.append(experience[3])
        done_tensor.append(experience[4])
    
    return np.asarray(states), np.asarray(actions), np.asarray(rewards), np.asarray(next_states), np.asarray(done_tensor)

In [7]:
def get_target_batch(states, actions, rewards, next_states, dones, net, gamma):
    assert actions.ndim == 1
    assert rewards.ndim == 1
    assert dones.ndim == 1
    assert len(actions) == len(rewards) == len(dones) == len(states) == len(next_states)
    
    target_q_values = net.get_q_values(states, which_net = 'online', batch = True) # get the q values from the current states
    
    which_acts = net.get_max_q_value_index(next_states, which_net= 'online', batch = True) # this gives you a list of action indices
    
    q_estimates = net.get_q_values(next_states, which_net = 'target', batch = True) # get the q estimates from the target net for the next states
    
    for index in range(len(target_q_values)):
        # index indexes the batch axis
        q_estimate = q_estimates[index]
        decoupled_action = which_acts[index]
        reward = rewards[index]
        action_taken = actions[index]
        ended = dones[index]
        
        # bellman equation related target value calculation for DDQNs
        if not ended:
            prev_target = q_estimate[decoupled_action]
        else:
            prev_target = 0
        
        target = reward + gamma * prev_target
        
        target_q_values[index][action_taken] = target # assign the target to the corresponding (state, action) pair

    return target_q_values

In [8]:
import matplotlib.pyplot as plt
import os
from tempfile import TemporaryFile
quit = False
import pickle 
def train_agent(contd=True, network_name = None, save_path = 'training_progress', contd_path = None, verbose=True, num_episodes=1500,
                discount = 0.95, batch_size = 64, N = 40, memory_size = 1024, 
                eps_decay_rate=0.9975, max_exp_rate=1.0, min_exp_rate=0.05, max_reward = 1000 ):
    # get all the hyperparameters in one place!
            
     
    # initialise your environment
    env = EnvironmentWrapper()
    
    # initialise your policy and target networks inside net
    net = NetworkTracker(env, source=contd, verbose=verbose, network_name=network_name)
    print(net.get_model_summary())
    
    # initialise your agent that will follow the epsilon greedy strategy
    agent = Agent(env, net, eps_decay_rate=eps_decay_rate, max_exp_rate=max_exp_rate,min_exp_rate=min_exp_rate )    
    
    # initialise experience replay memory
    memory = Memory(memory_size, save_path = save_path, mem_source = contd_path)
    
    # stores all the total reward per episode
    
    training_stats = []
    epochs = []
    starting_episode = 0
    
    sum_over_ten = 0
    moving_avg = []
    avg_epochs = []
    
    validations = []
    validation_epochs = []
    if contd:
        with open(save_path + '/training_stats.pkl', 'rb') as file:
            epochs, training_stats = pickle.load(file)
#             counter_temp = 0
#             temp_sum = 0
#             t = 0
#             for stat in training_stats:
#                 t += 1
#                 counter_temp += 1
#                 temp_sum += stat
#                 if counter_temp%10 == 0:
#                     counter_temp = 0
#                     avg = temp_sum / 10.0
#                     moving_avg.append(avg)
#                     avg_epochs.append(t)
#                     temp_sum = 0
        
        with open(save_path + '/moving_average.pkl', 'rb') as file:
            avg_epochs, moving_avg = pickle.load(file)          
        
        with open(save_path + '/episode_count.pkl', 'rb') as file:
            starting_episode = pickle.load(file)
            
    # graph display init code
    %matplotlib notebook
    plt.rcParams['animation.html'] = 'jshtml'
    fig = plt.figure()
    subplot = fig.add_subplot(111)
    
    for episode_count in range(starting_episode, num_episodes):
        # uncomment if you want to start the environmet with a random move
        # state = env.step(env.get_random_action())[0]
        valid_episode = False
        
        
        # check if the environment is available to run
        while not valid_episode:
            # keeps track of how many steps has been since the reward hasn't moved
            stuck_counter = 0

            # keeps track of the total reward that we got for this episode
            cumulative_reward = 0

            # keeps track of steps in the episode.
            counter = 0

            # reset environement and record the initial state before every episode
            state = env.reset()

            # store the experiences in a temporary tuple so that we only add them to memory if it was a valid episode.
            temp_exp = []
            stuck = False
            # ==============================
            while not env.is_done() and not stuck and cumulative_reward < max_reward: # run the environment for one episode
                counter += 1
                current_state, action, reward, next_state, done = agent.take_action(state) # let the agent take an action for one time step
                
                cumulative_reward += reward # add the reward to total rewards.
                
                # check if the car is stuck when the reward isn't changing by much
                if abs(reward) < 0.5:
                    stuck_counter += 1
                    if stuck_counter > 7:
                        done = True
                        stuck = True
                else:
                    stuck_counter = 0
                experience = current_state, action, reward, next_state, done # experience tuple 
                state = next_state # update the current state
                 # push the experience in memory
                temp_exp.append(experience)
            # ==============================
            
            if counter > 5:
                valid_episode = True
                for i in range(len(temp_exp)):
                    # temp_exp[i][-1] = cumulative_reward
                    memory.push(temp_exp[i])
                sum_over_ten += cumulative_reward

        agent.update_epsilon() # update the exploration rate of the agent after each episode

        if memory.is_usable(batch_size):
                experience_batch = memory.sample(batch_size) # sample randomly from memory
                states, actions, rewards, next_states, done_tensor = extract_tensors(experience_batch) # unzips the tensors

                target_batch = get_target_batch(states, actions, rewards, next_states, done_tensor, net, discount) # get a batch of target values to fit against

                net.fit(states, target_batch) # fit the network

        # append the training stats
        training_stats.append(cumulative_reward)
        epochs.append(episode_count)

        # clone the target policy every N episodes.
        if (episode_count + 1) % N == 0:
            net.clone_policy()
        
        if  (episode_count) % (N*2) == 0:
            performance = evaluate_agent(runs = 5, model_name = network_name, env = env)
            validations.append(performance)
            validation_epochs.append(epochs[-1])
            with open(save_path + '/validation.pkl', 'wb') as file:
                pickle.dump((validation_epochs, validations), file)
            

        # update the training chart every 10 episodes
        if (episode_count + 1) % 10 == 0:
            avg = sum_over_ten / 10.0
            sum_over_ten = 0
            moving_avg.append(avg)
            avg_epochs.append(epochs[-1])
            subplot.plot(epochs, training_stats, color='b')
            subplot.plot(avg_epochs, moving_avg, color = 'r')
            subplot.plot(validation_epochs, validations, color = 'g')
            fig.canvas.draw()
            
            # periodically save training progress.
            memory.save_to_disk()
            with open(save_path + '/training_stats.pkl', 'wb') as file:
                pickle.dump((epochs, training_stats), file)
            
            with open(save_path + '/moving_average.pkl', 'wb') as file:
                pickle.dump((avg_epochs, moving_avg), file)
            
            with open(save_path + '/episode_count.pkl', 'wb') as file:
                pickle.dump(episode_count, file)
            
            with open(save_path + '/epsilon.pkl', 'wb') as file:
                pickle.dump(agent.exp_rate, file)
            

        # if specified, print stats.
        if verbose:
            print("Episode Count: ", episode_count, "\t Cumulative Reward: ", round(cumulative_reward, 2), "\t eps: ", round(agent.exp_rate, 3) )

    
    memory.save_to_disk()
    net.save_policy()
    env.close()
    
    return epochs, training_stats, net





In [9]:
def evaluate_agent(runs, model_name, env = None):
    if env is None:
        env = EnvironmentWrapper()
    net = NetworkTracker(env, source=True, verbose=False, network_name=model_name)
    agent = Agent(env, net, run_only = True)
    
    perf_sum = 0
    for run in range(runs):
        valid_episode = False
        cumulative_reward = 0
        time_steps = 0
        while not valid_episode:
            state = env.reset()
            cumulative_reward = 0
            time_steps = 0
            stuck_counter = 0
            stuck = False
            while not env.is_done() and not stuck:
                _ , action, reward, next_state, done = agent.take_action(state)
                state = next_state
                time_steps += 1
                cumulative_reward += reward 
                if abs(reward) < 0.5:
                    stuck_counter += 1
                    if stuck_counter > 7:
                        done = True
                        stuck = True
                else:
                    stuck_counter = 0
            
            if time_steps > 3:
                valid_episode = True
                cumulative_reward = round(cumulative_reward, 2)
        perf_sum += cumulative_reward
        print('run {0}: cumulative_reward: {1}, ran for: {2} timesteps'.format(run, round(cumulative_reward, 2), time_steps))
    
    avg_perf = float(perf_sum/runs)
    
    print('average performance: ', avg_perf)
#     env.close()
    return avg_perf
    
    

In [10]:
history = train_agent(contd = True,
            verbose = True, 
            save_path = 'training_progress',
            contd_path = 'training_progress', 
            network_name = 'DuelingDdqn.h5',
            num_episodes = 1000,
            discount = 0.99, 
            batch_size = 512, 
            N = 100, # how often to clone the target policy
            memory_size = 4096,
            eps_decay_rate = 0.999,
            max_exp_rate = 0.2, 
            min_exp_rate = 0.1,
            max_reward = 3000)

Waiting to connect to Simulator...
Connected to simulator!
Directory  C:\Users\Sohaib Saqib\Documents\GitHub\SelfDrivingResearch/simulation/Screenshots  already exists
Model: "dueling_dqn"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64, 128, 4)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 61, 125, 10)  650         input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 20, 41, 10)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)  

<IPython.core.display.Javascript object>

run 0: cumulative_reward: 6.64, ran for: 4 timesteps
run 1: cumulative_reward: 13.41, ran for: 4 timesteps
run 2: cumulative_reward: 12.83, ran for: 4 timesteps
run 3: cumulative_reward: 13.69, ran for: 4 timesteps
run 4: cumulative_reward: 7.51, ran for: 4 timesteps
average performance:  10.815999999999999
Episode Count:  0 	 Cumulative Reward:  20.76 	 eps:  0.999
Episode Count:  1 	 Cumulative Reward:  15.72 	 eps:  0.998
Episode Count:  2 	 Cumulative Reward:  16.64 	 eps:  0.997
Episode Count:  3 	 Cumulative Reward:  4.03 	 eps:  0.996
Episode Count:  4 	 Cumulative Reward:  16.14 	 eps:  0.995
Episode Count:  5 	 Cumulative Reward:  11.49 	 eps:  0.994
Episode Count:  6 	 Cumulative Reward:  15.31 	 eps:  0.993
Episode Count:  7 	 Cumulative Reward:  15.43 	 eps:  0.992
Episode Count:  8 	 Cumulative Reward:  22.78 	 eps:  0.991
Episode Count:  9 	 Cumulative Reward:  15.75 	 eps:  0.99
Episode Count:  10 	 Cumulative Reward:  22.23 	 eps:  0.989
Episode Count:  11 	 Cumulative 

Episode Count:  84 	 Cumulative Reward:  5.91 	 eps:  0.918
Episode Count:  85 	 Cumulative Reward:  17.89 	 eps:  0.918
Episode Count:  86 	 Cumulative Reward:  4.33 	 eps:  0.917
Episode Count:  87 	 Cumulative Reward:  40.69 	 eps:  0.916
Episode Count:  88 	 Cumulative Reward:  15.39 	 eps:  0.915
Episode Count:  89 	 Cumulative Reward:  26.07 	 eps:  0.914
Episode Count:  90 	 Cumulative Reward:  34.06 	 eps:  0.913
Episode Count:  91 	 Cumulative Reward:  26.73 	 eps:  0.912
Episode Count:  92 	 Cumulative Reward:  50.7 	 eps:  0.911
Episode Count:  93 	 Cumulative Reward:  15.55 	 eps:  0.91
Episode Count:  94 	 Cumulative Reward:  7.52 	 eps:  0.909
Episode Count:  95 	 Cumulative Reward:  19.67 	 eps:  0.908
Episode Count:  96 	 Cumulative Reward:  7.77 	 eps:  0.908
Episode Count:  97 	 Cumulative Reward:  18.04 	 eps:  0.907
Episode Count:  98 	 Cumulative Reward:  18.91 	 eps:  0.906
Episode Count:  99 	 Cumulative Reward:  9.44 	 eps:  0.905
Episode Count:  100 	 Cumulativ

Episode Count:  147 	 Cumulative Reward:  18.54 	 eps:  0.862
Episode Count:  148 	 Cumulative Reward:  14.19 	 eps:  0.862
Episode Count:  149 	 Cumulative Reward:  22.46 	 eps:  0.861
Episode Count:  150 	 Cumulative Reward:  4.94 	 eps:  0.86
Episode Count:  151 	 Cumulative Reward:  20.49 	 eps:  0.859
Episode Count:  152 	 Cumulative Reward:  51.95 	 eps:  0.858
Episode Count:  153 	 Cumulative Reward:  12.83 	 eps:  0.857
Episode Count:  154 	 Cumulative Reward:  12.53 	 eps:  0.856
Episode Count:  155 	 Cumulative Reward:  14.93 	 eps:  0.855
Episode Count:  156 	 Cumulative Reward:  24.23 	 eps:  0.855
Episode Count:  157 	 Cumulative Reward:  29.73 	 eps:  0.854
Episode Count:  158 	 Cumulative Reward:  19.35 	 eps:  0.853
Episode Count:  159 	 Cumulative Reward:  9.1 	 eps:  0.852
Episode Count:  160 	 Cumulative Reward:  6.15 	 eps:  0.851
Episode Count:  161 	 Cumulative Reward:  13.9 	 eps:  0.85
Episode Count:  162 	 Cumulative Reward:  15.89 	 eps:  0.85
Episode Count:  

Episode Count:  270 	 Cumulative Reward:  18.0 	 eps:  0.763
Episode Count:  271 	 Cumulative Reward:  26.2 	 eps:  0.762
Episode Count:  272 	 Cumulative Reward:  10.54 	 eps:  0.761
Episode Count:  273 	 Cumulative Reward:  11.59 	 eps:  0.76
Episode Count:  274 	 Cumulative Reward:  17.63 	 eps:  0.759
Episode Count:  275 	 Cumulative Reward:  28.01 	 eps:  0.759
Episode Count:  276 	 Cumulative Reward:  12.37 	 eps:  0.758
Episode Count:  277 	 Cumulative Reward:  21.92 	 eps:  0.757
Episode Count:  278 	 Cumulative Reward:  43.05 	 eps:  0.756
Episode Count:  279 	 Cumulative Reward:  19.64 	 eps:  0.756
Episode Count:  280 	 Cumulative Reward:  37.24 	 eps:  0.755
Episode Count:  281 	 Cumulative Reward:  33.8 	 eps:  0.754
Episode Count:  282 	 Cumulative Reward:  12.09 	 eps:  0.753
Episode Count:  283 	 Cumulative Reward:  22.94 	 eps:  0.753
Episode Count:  284 	 Cumulative Reward:  15.51 	 eps:  0.752
Episode Count:  285 	 Cumulative Reward:  24.45 	 eps:  0.751
Episode Coun

Episode Count:  333 	 Cumulative Reward:  12.53 	 eps:  0.716
Episode Count:  334 	 Cumulative Reward:  42.14 	 eps:  0.715
Episode Count:  335 	 Cumulative Reward:  9.87 	 eps:  0.715
Episode Count:  336 	 Cumulative Reward:  40.42 	 eps:  0.714
Episode Count:  337 	 Cumulative Reward:  13.12 	 eps:  0.713
Episode Count:  338 	 Cumulative Reward:  24.62 	 eps:  0.712
Episode Count:  339 	 Cumulative Reward:  28.34 	 eps:  0.712
Episode Count:  340 	 Cumulative Reward:  25.21 	 eps:  0.711
Episode Count:  341 	 Cumulative Reward:  4.81 	 eps:  0.71
Episode Count:  342 	 Cumulative Reward:  23.17 	 eps:  0.71
Episode Count:  343 	 Cumulative Reward:  28.59 	 eps:  0.709
Episode Count:  344 	 Cumulative Reward:  13.81 	 eps:  0.708
Episode Count:  345 	 Cumulative Reward:  44.38 	 eps:  0.707
Episode Count:  346 	 Cumulative Reward:  11.85 	 eps:  0.707
Episode Count:  347 	 Cumulative Reward:  22.25 	 eps:  0.706
Episode Count:  348 	 Cumulative Reward:  50.75 	 eps:  0.705
Episode Coun

Episode Count:  396 	 Cumulative Reward:  8.26 	 eps:  0.672
Episode Count:  397 	 Cumulative Reward:  24.22 	 eps:  0.672
Episode Count:  398 	 Cumulative Reward:  10.92 	 eps:  0.671
Episode Count:  399 	 Cumulative Reward:  16.36 	 eps:  0.67
run 0: cumulative_reward: 33.14, ran for: 15 timesteps
run 1: cumulative_reward: 49.98, ran for: 24 timesteps
run 2: cumulative_reward: 19.65, ran for: 32 timesteps
run 3: cumulative_reward: 21.82, ran for: 10 timesteps
run 4: cumulative_reward: 21.48, ran for: 13 timesteps
average performance:  29.214
Episode Count:  400 	 Cumulative Reward:  46.45 	 eps:  0.67
Episode Count:  401 	 Cumulative Reward:  11.64 	 eps:  0.669
Episode Count:  402 	 Cumulative Reward:  19.73 	 eps:  0.668
Episode Count:  403 	 Cumulative Reward:  25.32 	 eps:  0.668
Episode Count:  404 	 Cumulative Reward:  22.47 	 eps:  0.667
Episode Count:  405 	 Cumulative Reward:  22.47 	 eps:  0.666
Episode Count:  406 	 Cumulative Reward:  22.24 	 eps:  0.666
Episode Count:  4

Episode Count:  518 	 Cumulative Reward:  7.33 	 eps:  0.595
Episode Count:  519 	 Cumulative Reward:  20.96 	 eps:  0.594
Episode Count:  520 	 Cumulative Reward:  65.68 	 eps:  0.594
Episode Count:  521 	 Cumulative Reward:  7.04 	 eps:  0.593
Episode Count:  522 	 Cumulative Reward:  13.56 	 eps:  0.593
Episode Count:  523 	 Cumulative Reward:  38.79 	 eps:  0.592
Episode Count:  524 	 Cumulative Reward:  12.52 	 eps:  0.591
Episode Count:  525 	 Cumulative Reward:  49.34 	 eps:  0.591
Episode Count:  526 	 Cumulative Reward:  39.73 	 eps:  0.59
Episode Count:  527 	 Cumulative Reward:  30.99 	 eps:  0.59
Episode Count:  528 	 Cumulative Reward:  13.25 	 eps:  0.589
Episode Count:  529 	 Cumulative Reward:  18.32 	 eps:  0.588
Episode Count:  530 	 Cumulative Reward:  22.92 	 eps:  0.588
Episode Count:  531 	 Cumulative Reward:  64.0 	 eps:  0.587
Episode Count:  532 	 Cumulative Reward:  96.68 	 eps:  0.587
Episode Count:  533 	 Cumulative Reward:  16.67 	 eps:  0.586
Episode Count

Episode Count:  640 	 Cumulative Reward:  9.01 	 eps:  0.527
Episode Count:  641 	 Cumulative Reward:  33.11 	 eps:  0.526
Episode Count:  642 	 Cumulative Reward:  42.93 	 eps:  0.526
Episode Count:  643 	 Cumulative Reward:  18.44 	 eps:  0.525
Episode Count:  644 	 Cumulative Reward:  35.65 	 eps:  0.524
Episode Count:  645 	 Cumulative Reward:  7.78 	 eps:  0.524
Episode Count:  646 	 Cumulative Reward:  25.26 	 eps:  0.523
Episode Count:  647 	 Cumulative Reward:  11.27 	 eps:  0.523
Episode Count:  648 	 Cumulative Reward:  39.42 	 eps:  0.522
Episode Count:  649 	 Cumulative Reward:  25.72 	 eps:  0.522
Episode Count:  650 	 Cumulative Reward:  12.76 	 eps:  0.521
Episode Count:  651 	 Cumulative Reward:  17.92 	 eps:  0.521
Episode Count:  652 	 Cumulative Reward:  50.18 	 eps:  0.52
Episode Count:  653 	 Cumulative Reward:  48.08 	 eps:  0.52
Episode Count:  654 	 Cumulative Reward:  16.53 	 eps:  0.519
Episode Count:  655 	 Cumulative Reward:  21.4 	 eps:  0.519
Episode Count

Episode Count:  764 	 Cumulative Reward:  32.4 	 eps:  0.465
Episode Count:  765 	 Cumulative Reward:  36.68 	 eps:  0.465
Episode Count:  766 	 Cumulative Reward:  23.97 	 eps:  0.464
Episode Count:  767 	 Cumulative Reward:  16.8 	 eps:  0.464
Episode Count:  768 	 Cumulative Reward:  24.18 	 eps:  0.463
Episode Count:  769 	 Cumulative Reward:  22.19 	 eps:  0.463
Episode Count:  770 	 Cumulative Reward:  32.82 	 eps:  0.462
Episode Count:  771 	 Cumulative Reward:  17.49 	 eps:  0.462
Episode Count:  772 	 Cumulative Reward:  14.17 	 eps:  0.461
Episode Count:  773 	 Cumulative Reward:  12.45 	 eps:  0.461
Episode Count:  774 	 Cumulative Reward:  21.63 	 eps:  0.461
Episode Count:  775 	 Cumulative Reward:  13.36 	 eps:  0.46
Episode Count:  776 	 Cumulative Reward:  32.7 	 eps:  0.46
Episode Count:  777 	 Cumulative Reward:  20.91 	 eps:  0.459
Episode Count:  778 	 Cumulative Reward:  11.85 	 eps:  0.459
Episode Count:  779 	 Cumulative Reward:  24.95 	 eps:  0.458
Episode Count

Episode Count:  825 	 Cumulative Reward:  17.5 	 eps:  0.438
Episode Count:  826 	 Cumulative Reward:  39.51 	 eps:  0.437
Episode Count:  827 	 Cumulative Reward:  40.04 	 eps:  0.437
Episode Count:  828 	 Cumulative Reward:  25.1 	 eps:  0.436
Episode Count:  829 	 Cumulative Reward:  56.18 	 eps:  0.436
Episode Count:  830 	 Cumulative Reward:  27.43 	 eps:  0.435
Episode Count:  831 	 Cumulative Reward:  16.66 	 eps:  0.435
Episode Count:  832 	 Cumulative Reward:  27.31 	 eps:  0.435
Episode Count:  833 	 Cumulative Reward:  53.56 	 eps:  0.434
Episode Count:  834 	 Cumulative Reward:  12.59 	 eps:  0.434
Episode Count:  835 	 Cumulative Reward:  7.45 	 eps:  0.433
Episode Count:  836 	 Cumulative Reward:  101.6 	 eps:  0.433
Episode Count:  837 	 Cumulative Reward:  20.05 	 eps:  0.432
Episode Count:  838 	 Cumulative Reward:  8.81 	 eps:  0.432
Episode Count:  839 	 Cumulative Reward:  34.23 	 eps:  0.432
Episode Count:  840 	 Cumulative Reward:  37.97 	 eps:  0.431
Episode Coun

Episode Count:  949 	 Cumulative Reward:  20.16 	 eps:  0.387
Episode Count:  950 	 Cumulative Reward:  7.42 	 eps:  0.386
Episode Count:  951 	 Cumulative Reward:  18.83 	 eps:  0.386
Episode Count:  952 	 Cumulative Reward:  50.94 	 eps:  0.385
Episode Count:  953 	 Cumulative Reward:  27.75 	 eps:  0.385
Episode Count:  954 	 Cumulative Reward:  27.63 	 eps:  0.385
Episode Count:  955 	 Cumulative Reward:  61.85 	 eps:  0.384
Episode Count:  956 	 Cumulative Reward:  22.99 	 eps:  0.384
Episode Count:  957 	 Cumulative Reward:  27.01 	 eps:  0.383
Episode Count:  958 	 Cumulative Reward:  17.54 	 eps:  0.383
Episode Count:  959 	 Cumulative Reward:  13.04 	 eps:  0.383
Episode Count:  960 	 Cumulative Reward:  22.53 	 eps:  0.382
Episode Count:  961 	 Cumulative Reward:  34.68 	 eps:  0.382
Episode Count:  962 	 Cumulative Reward:  14.82 	 eps:  0.382
Episode Count:  963 	 Cumulative Reward:  29.85 	 eps:  0.381
Episode Count:  964 	 Cumulative Reward:  22.71 	 eps:  0.381
Episode C

Episode Count:  1009 	 Cumulative Reward:  11.77 	 eps:  0.364
Episode Count:  1010 	 Cumulative Reward:  61.54 	 eps:  0.364
Episode Count:  1011 	 Cumulative Reward:  18.86 	 eps:  0.363
Episode Count:  1012 	 Cumulative Reward:  24.43 	 eps:  0.363
Episode Count:  1013 	 Cumulative Reward:  53.52 	 eps:  0.363
Episode Count:  1014 	 Cumulative Reward:  6.49 	 eps:  0.362
Episode Count:  1015 	 Cumulative Reward:  57.45 	 eps:  0.362
Episode Count:  1016 	 Cumulative Reward:  9.73 	 eps:  0.361
Episode Count:  1017 	 Cumulative Reward:  38.71 	 eps:  0.361
Episode Count:  1018 	 Cumulative Reward:  35.18 	 eps:  0.361
Episode Count:  1019 	 Cumulative Reward:  25.53 	 eps:  0.36
Episode Count:  1020 	 Cumulative Reward:  25.83 	 eps:  0.36
Episode Count:  1021 	 Cumulative Reward:  26.24 	 eps:  0.36
Episode Count:  1022 	 Cumulative Reward:  34.72 	 eps:  0.359
Episode Count:  1023 	 Cumulative Reward:  65.89 	 eps:  0.359
Episode Count:  1024 	 Cumulative Reward:  43.26 	 eps:  0.3

Episode Count:  1071 	 Cumulative Reward:  46.01 	 eps:  0.342
Episode Count:  1072 	 Cumulative Reward:  60.08 	 eps:  0.342
Episode Count:  1073 	 Cumulative Reward:  32.82 	 eps:  0.341
Episode Count:  1074 	 Cumulative Reward:  15.47 	 eps:  0.341
Episode Count:  1075 	 Cumulative Reward:  18.07 	 eps:  0.341
Episode Count:  1076 	 Cumulative Reward:  34.97 	 eps:  0.34
Episode Count:  1077 	 Cumulative Reward:  22.68 	 eps:  0.34
Episode Count:  1078 	 Cumulative Reward:  47.03 	 eps:  0.34
Episode Count:  1079 	 Cumulative Reward:  46.13 	 eps:  0.339
Episode Count:  1080 	 Cumulative Reward:  10.69 	 eps:  0.339
Episode Count:  1081 	 Cumulative Reward:  43.51 	 eps:  0.339
Episode Count:  1082 	 Cumulative Reward:  15.74 	 eps:  0.338
Episode Count:  1083 	 Cumulative Reward:  21.65 	 eps:  0.338
Episode Count:  1084 	 Cumulative Reward:  23.04 	 eps:  0.338
Episode Count:  1085 	 Cumulative Reward:  24.76 	 eps:  0.337
Episode Count:  1086 	 Cumulative Reward:  17.86 	 eps:  0

Episode Count:  1133 	 Cumulative Reward:  23.99 	 eps:  0.322
Episode Count:  1134 	 Cumulative Reward:  25.58 	 eps:  0.321
Episode Count:  1135 	 Cumulative Reward:  35.32 	 eps:  0.321
Episode Count:  1136 	 Cumulative Reward:  23.63 	 eps:  0.321
Episode Count:  1137 	 Cumulative Reward:  43.05 	 eps:  0.32
Episode Count:  1138 	 Cumulative Reward:  13.68 	 eps:  0.32
Episode Count:  1139 	 Cumulative Reward:  13.21 	 eps:  0.32
Episode Count:  1140 	 Cumulative Reward:  34.6 	 eps:  0.319
Episode Count:  1141 	 Cumulative Reward:  21.8 	 eps:  0.319
Episode Count:  1142 	 Cumulative Reward:  81.13 	 eps:  0.319
Episode Count:  1143 	 Cumulative Reward:  15.86 	 eps:  0.318
Episode Count:  1144 	 Cumulative Reward:  10.48 	 eps:  0.318
Episode Count:  1145 	 Cumulative Reward:  59.6 	 eps:  0.318
Episode Count:  1146 	 Cumulative Reward:  41.09 	 eps:  0.317
Episode Count:  1147 	 Cumulative Reward:  18.47 	 eps:  0.317
Episode Count:  1148 	 Cumulative Reward:  45.92 	 eps:  0.31

Episode Count:  1195 	 Cumulative Reward:  19.76 	 eps:  0.302
Episode Count:  1196 	 Cumulative Reward:  34.2 	 eps:  0.302
Episode Count:  1197 	 Cumulative Reward:  38.66 	 eps:  0.302
Episode Count:  1198 	 Cumulative Reward:  88.57 	 eps:  0.301
Episode Count:  1199 	 Cumulative Reward:  14.74 	 eps:  0.301
run 0: cumulative_reward: 56.71, ran for: 27 timesteps
run 1: cumulative_reward: 28.02, ran for: 9 timesteps
run 2: cumulative_reward: 26.92, ran for: 18 timesteps
run 3: cumulative_reward: 35.25, ran for: 8 timesteps
run 4: cumulative_reward: 19.41, ran for: 14 timesteps
average performance:  33.262
Episode Count:  1200 	 Cumulative Reward:  60.53 	 eps:  0.301
Episode Count:  1201 	 Cumulative Reward:  29.81 	 eps:  0.3
Episode Count:  1202 	 Cumulative Reward:  18.03 	 eps:  0.3
Episode Count:  1203 	 Cumulative Reward:  90.1 	 eps:  0.3
Episode Count:  1204 	 Cumulative Reward:  44.49 	 eps:  0.3
Episode Count:  1205 	 Cumulative Reward:  37.14 	 eps:  0.299
Episode Count: 

Episode Count:  1254 	 Cumulative Reward:  33.92 	 eps:  0.285
Episode Count:  1255 	 Cumulative Reward:  16.91 	 eps:  0.285
Episode Count:  1256 	 Cumulative Reward:  55.18 	 eps:  0.284
Episode Count:  1257 	 Cumulative Reward:  30.87 	 eps:  0.284
Episode Count:  1258 	 Cumulative Reward:  56.21 	 eps:  0.284
Episode Count:  1259 	 Cumulative Reward:  21.28 	 eps:  0.283
Episode Count:  1260 	 Cumulative Reward:  26.54 	 eps:  0.283
Episode Count:  1261 	 Cumulative Reward:  12.09 	 eps:  0.283
Episode Count:  1262 	 Cumulative Reward:  42.6 	 eps:  0.283
Episode Count:  1263 	 Cumulative Reward:  22.64 	 eps:  0.282
Episode Count:  1264 	 Cumulative Reward:  35.92 	 eps:  0.282
Episode Count:  1265 	 Cumulative Reward:  96.72 	 eps:  0.282
Episode Count:  1266 	 Cumulative Reward:  14.15 	 eps:  0.281
Episode Count:  1267 	 Cumulative Reward:  16.31 	 eps:  0.281
Episode Count:  1268 	 Cumulative Reward:  9.12 	 eps:  0.281
Episode Count:  1269 	 Cumulative Reward:  64.71 	 eps:  

Episode Count:  1378 	 Cumulative Reward:  45.11 	 eps:  0.252
Episode Count:  1379 	 Cumulative Reward:  21.51 	 eps:  0.251
Episode Count:  1380 	 Cumulative Reward:  45.24 	 eps:  0.251
Episode Count:  1381 	 Cumulative Reward:  25.73 	 eps:  0.251
Episode Count:  1382 	 Cumulative Reward:  46.58 	 eps:  0.251
Episode Count:  1383 	 Cumulative Reward:  62.11 	 eps:  0.25
Episode Count:  1384 	 Cumulative Reward:  36.63 	 eps:  0.25
Episode Count:  1385 	 Cumulative Reward:  28.95 	 eps:  0.25
Episode Count:  1386 	 Cumulative Reward:  5.52 	 eps:  0.25
Episode Count:  1387 	 Cumulative Reward:  11.92 	 eps:  0.249
Episode Count:  1388 	 Cumulative Reward:  47.2 	 eps:  0.249
Episode Count:  1389 	 Cumulative Reward:  15.34 	 eps:  0.249
Episode Count:  1390 	 Cumulative Reward:  27.62 	 eps:  0.249
Episode Count:  1391 	 Cumulative Reward:  34.8 	 eps:  0.248
Episode Count:  1392 	 Cumulative Reward:  22.78 	 eps:  0.248
Episode Count:  1393 	 Cumulative Reward:  42.33 	 eps:  0.248

Episode Count:  1499 	 Cumulative Reward:  -0.36 	 eps:  0.223
Episode Count:  1500 	 Cumulative Reward:  82.19 	 eps:  0.223
Episode Count:  1501 	 Cumulative Reward:  24.72 	 eps:  0.223
Episode Count:  1502 	 Cumulative Reward:  25.51 	 eps:  0.222
Episode Count:  1503 	 Cumulative Reward:  45.24 	 eps:  0.222
Episode Count:  1504 	 Cumulative Reward:  23.62 	 eps:  0.222
Episode Count:  1505 	 Cumulative Reward:  141.29 	 eps:  0.222
Episode Count:  1506 	 Cumulative Reward:  103.72 	 eps:  0.221
Episode Count:  1507 	 Cumulative Reward:  22.89 	 eps:  0.221
Episode Count:  1508 	 Cumulative Reward:  38.0 	 eps:  0.221
Episode Count:  1509 	 Cumulative Reward:  41.02 	 eps:  0.221
Episode Count:  1510 	 Cumulative Reward:  38.59 	 eps:  0.221
Episode Count:  1511 	 Cumulative Reward:  3.47 	 eps:  0.22
Episode Count:  1512 	 Cumulative Reward:  35.77 	 eps:  0.22
Episode Count:  1513 	 Cumulative Reward:  85.43 	 eps:  0.22
Episode Count:  1514 	 Cumulative Reward:  23.58 	 eps:  0

Episode Count:  1561 	 Cumulative Reward:  2.27 	 eps:  0.21
Episode Count:  1562 	 Cumulative Reward:  17.53 	 eps:  0.209
Episode Count:  1563 	 Cumulative Reward:  139.14 	 eps:  0.209
Episode Count:  1564 	 Cumulative Reward:  32.42 	 eps:  0.209
Episode Count:  1565 	 Cumulative Reward:  28.92 	 eps:  0.209
Episode Count:  1566 	 Cumulative Reward:  50.41 	 eps:  0.209
Episode Count:  1567 	 Cumulative Reward:  51.94 	 eps:  0.208
Episode Count:  1568 	 Cumulative Reward:  14.17 	 eps:  0.208
Episode Count:  1569 	 Cumulative Reward:  39.8 	 eps:  0.208
Episode Count:  1570 	 Cumulative Reward:  38.97 	 eps:  0.208
Episode Count:  1571 	 Cumulative Reward:  45.17 	 eps:  0.207
Episode Count:  1572 	 Cumulative Reward:  71.18 	 eps:  0.207
Episode Count:  1573 	 Cumulative Reward:  32.13 	 eps:  0.207
Episode Count:  1574 	 Cumulative Reward:  52.14 	 eps:  0.207
Episode Count:  1575 	 Cumulative Reward:  14.05 	 eps:  0.207
Episode Count:  1576 	 Cumulative Reward:  46.92 	 eps:  

Episode Count:  1621 	 Cumulative Reward:  56.13 	 eps:  0.197
Episode Count:  1622 	 Cumulative Reward:  68.79 	 eps:  0.197
Episode Count:  1623 	 Cumulative Reward:  19.97 	 eps:  0.197
Episode Count:  1624 	 Cumulative Reward:  21.76 	 eps:  0.197
Episode Count:  1625 	 Cumulative Reward:  33.61 	 eps:  0.197
Episode Count:  1626 	 Cumulative Reward:  20.39 	 eps:  0.196
Episode Count:  1627 	 Cumulative Reward:  19.01 	 eps:  0.196
Episode Count:  1628 	 Cumulative Reward:  10.42 	 eps:  0.196
Episode Count:  1629 	 Cumulative Reward:  42.11 	 eps:  0.196
Episode Count:  1630 	 Cumulative Reward:  106.45 	 eps:  0.196
Episode Count:  1631 	 Cumulative Reward:  38.57 	 eps:  0.195
Episode Count:  1632 	 Cumulative Reward:  33.0 	 eps:  0.195
Episode Count:  1633 	 Cumulative Reward:  14.57 	 eps:  0.195
Episode Count:  1634 	 Cumulative Reward:  44.12 	 eps:  0.195
Episode Count:  1635 	 Cumulative Reward:  33.77 	 eps:  0.195
Episode Count:  1636 	 Cumulative Reward:  112.78 	 eps

Episode Count:  1745 	 Cumulative Reward:  74.64 	 eps:  0.174
Episode Count:  1746 	 Cumulative Reward:  28.87 	 eps:  0.174
Episode Count:  1747 	 Cumulative Reward:  20.8 	 eps:  0.174
Episode Count:  1748 	 Cumulative Reward:  28.55 	 eps:  0.174
Episode Count:  1749 	 Cumulative Reward:  43.31 	 eps:  0.174
Episode Count:  1750 	 Cumulative Reward:  38.27 	 eps:  0.173
Episode Count:  1751 	 Cumulative Reward:  94.67 	 eps:  0.173
Episode Count:  1752 	 Cumulative Reward:  67.06 	 eps:  0.173
Episode Count:  1753 	 Cumulative Reward:  32.37 	 eps:  0.173
Episode Count:  1754 	 Cumulative Reward:  18.02 	 eps:  0.173
Episode Count:  1755 	 Cumulative Reward:  16.13 	 eps:  0.173
Episode Count:  1756 	 Cumulative Reward:  48.46 	 eps:  0.172
Episode Count:  1757 	 Cumulative Reward:  24.75 	 eps:  0.172
Episode Count:  1758 	 Cumulative Reward:  17.57 	 eps:  0.172
Episode Count:  1759 	 Cumulative Reward:  32.27 	 eps:  0.172
Episode Count:  1760 	 Cumulative Reward:  22.42 	 eps: 

Episode Count:  1805 	 Cumulative Reward:  24.43 	 eps:  0.164
Episode Count:  1806 	 Cumulative Reward:  25.55 	 eps:  0.164
Episode Count:  1807 	 Cumulative Reward:  59.17 	 eps:  0.164
Episode Count:  1808 	 Cumulative Reward:  23.77 	 eps:  0.164
Episode Count:  1809 	 Cumulative Reward:  32.98 	 eps:  0.164
Episode Count:  1810 	 Cumulative Reward:  76.34 	 eps:  0.163
Episode Count:  1811 	 Cumulative Reward:  22.01 	 eps:  0.163
Episode Count:  1812 	 Cumulative Reward:  51.41 	 eps:  0.163
Episode Count:  1813 	 Cumulative Reward:  29.42 	 eps:  0.163
Episode Count:  1814 	 Cumulative Reward:  24.85 	 eps:  0.163
Episode Count:  1815 	 Cumulative Reward:  58.39 	 eps:  0.163
Episode Count:  1816 	 Cumulative Reward:  101.28 	 eps:  0.162
Episode Count:  1817 	 Cumulative Reward:  21.45 	 eps:  0.162
Episode Count:  1818 	 Cumulative Reward:  23.01 	 eps:  0.162
Episode Count:  1819 	 Cumulative Reward:  31.8 	 eps:  0.162
Episode Count:  1820 	 Cumulative Reward:  27.96 	 eps:

Episode Count:  1867 	 Cumulative Reward:  32.02 	 eps:  0.154
Episode Count:  1868 	 Cumulative Reward:  20.87 	 eps:  0.154
Episode Count:  1869 	 Cumulative Reward:  16.21 	 eps:  0.154
Episode Count:  1870 	 Cumulative Reward:  41.29 	 eps:  0.154
Episode Count:  1871 	 Cumulative Reward:  20.61 	 eps:  0.154
Episode Count:  1872 	 Cumulative Reward:  127.43 	 eps:  0.154
Episode Count:  1873 	 Cumulative Reward:  82.45 	 eps:  0.153
Episode Count:  1874 	 Cumulative Reward:  39.33 	 eps:  0.153
Episode Count:  1875 	 Cumulative Reward:  10.27 	 eps:  0.153
Episode Count:  1876 	 Cumulative Reward:  73.38 	 eps:  0.153
Episode Count:  1877 	 Cumulative Reward:  80.51 	 eps:  0.153
Episode Count:  1878 	 Cumulative Reward:  178.19 	 eps:  0.153
Episode Count:  1879 	 Cumulative Reward:  51.51 	 eps:  0.152
Episode Count:  1880 	 Cumulative Reward:  50.49 	 eps:  0.152
Episode Count:  1881 	 Cumulative Reward:  15.06 	 eps:  0.152
Episode Count:  1882 	 Cumulative Reward:  209.77 	 e

Episode Count:  1929 	 Cumulative Reward:  24.48 	 eps:  0.145
Episode Count:  1930 	 Cumulative Reward:  30.59 	 eps:  0.145
Episode Count:  1931 	 Cumulative Reward:  24.01 	 eps:  0.145
Episode Count:  1932 	 Cumulative Reward:  37.97 	 eps:  0.145
Episode Count:  1933 	 Cumulative Reward:  33.77 	 eps:  0.144
Episode Count:  1934 	 Cumulative Reward:  19.29 	 eps:  0.144
Episode Count:  1935 	 Cumulative Reward:  28.17 	 eps:  0.144
Episode Count:  1936 	 Cumulative Reward:  20.69 	 eps:  0.144
Episode Count:  1937 	 Cumulative Reward:  69.1 	 eps:  0.144
Episode Count:  1938 	 Cumulative Reward:  111.46 	 eps:  0.144
Episode Count:  1939 	 Cumulative Reward:  51.81 	 eps:  0.144
Episode Count:  1940 	 Cumulative Reward:  68.01 	 eps:  0.143
Episode Count:  1941 	 Cumulative Reward:  17.22 	 eps:  0.143
Episode Count:  1942 	 Cumulative Reward:  151.2 	 eps:  0.143
Episode Count:  1943 	 Cumulative Reward:  77.4 	 eps:  0.143
Episode Count:  1944 	 Cumulative Reward:  71.7 	 eps:  

Episode Count:  1991 	 Cumulative Reward:  25.89 	 eps:  0.136
Episode Count:  1992 	 Cumulative Reward:  154.3 	 eps:  0.136
Episode Count:  1993 	 Cumulative Reward:  95.34 	 eps:  0.136
Episode Count:  1994 	 Cumulative Reward:  12.84 	 eps:  0.136
Episode Count:  1995 	 Cumulative Reward:  51.53 	 eps:  0.136
Episode Count:  1996 	 Cumulative Reward:  25.07 	 eps:  0.136
Episode Count:  1997 	 Cumulative Reward:  10.07 	 eps:  0.135
Episode Count:  1998 	 Cumulative Reward:  75.0 	 eps:  0.135
Episode Count:  1999 	 Cumulative Reward:  54.48 	 eps:  0.135
run 0: cumulative_reward: 26.33, ran for: 19 timesteps
run 1: cumulative_reward: 52.74, ran for: 34 timesteps
run 2: cumulative_reward: 33.54, ran for: 32 timesteps
run 3: cumulative_reward: 85.22, ran for: 36 timesteps
run 4: cumulative_reward: 12.14, ran for: 17 timesteps
average performance:  41.99399999999999
Episode Count:  2000 	 Cumulative Reward:  71.07 	 eps:  0.135
Episode Count:  2001 	 Cumulative Reward:  126.1 	 eps: 

Episode Count:  2051 	 Cumulative Reward:  49.21 	 eps:  0.128
Episode Count:  2052 	 Cumulative Reward:  24.07 	 eps:  0.128
Episode Count:  2053 	 Cumulative Reward:  47.36 	 eps:  0.128
Episode Count:  2054 	 Cumulative Reward:  54.17 	 eps:  0.128
Episode Count:  2055 	 Cumulative Reward:  14.8 	 eps:  0.128
Episode Count:  2056 	 Cumulative Reward:  146.69 	 eps:  0.128
Episode Count:  2057 	 Cumulative Reward:  78.22 	 eps:  0.128
Episode Count:  2058 	 Cumulative Reward:  43.12 	 eps:  0.127
Episode Count:  2059 	 Cumulative Reward:  34.7 	 eps:  0.127
Episode Count:  2060 	 Cumulative Reward:  7.82 	 eps:  0.127
Episode Count:  2061 	 Cumulative Reward:  39.83 	 eps:  0.127
Episode Count:  2062 	 Cumulative Reward:  87.15 	 eps:  0.127
Episode Count:  2063 	 Cumulative Reward:  77.5 	 eps:  0.127
Episode Count:  2064 	 Cumulative Reward:  96.91 	 eps:  0.127
Episode Count:  2065 	 Cumulative Reward:  46.71 	 eps:  0.127
Episode Count:  2066 	 Cumulative Reward:  37.97 	 eps:  0

Episode Count:  2175 	 Cumulative Reward:  70.48 	 eps:  0.113
Episode Count:  2176 	 Cumulative Reward:  86.59 	 eps:  0.113
Episode Count:  2177 	 Cumulative Reward:  16.36 	 eps:  0.113
Episode Count:  2178 	 Cumulative Reward:  41.85 	 eps:  0.113
Episode Count:  2179 	 Cumulative Reward:  41.13 	 eps:  0.113
Episode Count:  2180 	 Cumulative Reward:  17.52 	 eps:  0.113
Episode Count:  2181 	 Cumulative Reward:  27.36 	 eps:  0.113
Episode Count:  2182 	 Cumulative Reward:  16.49 	 eps:  0.113
Episode Count:  2183 	 Cumulative Reward:  44.84 	 eps:  0.112
Episode Count:  2184 	 Cumulative Reward:  0.29 	 eps:  0.112
Episode Count:  2185 	 Cumulative Reward:  6.33 	 eps:  0.112
Episode Count:  2186 	 Cumulative Reward:  46.89 	 eps:  0.112
Episode Count:  2187 	 Cumulative Reward:  21.16 	 eps:  0.112
Episode Count:  2188 	 Cumulative Reward:  35.76 	 eps:  0.112
Episode Count:  2189 	 Cumulative Reward:  31.04 	 eps:  0.112
Episode Count:  2190 	 Cumulative Reward:  39.93 	 eps:  

Episode Count:  2235 	 Cumulative Reward:  23.42 	 eps:  0.107
Episode Count:  2236 	 Cumulative Reward:  18.67 	 eps:  0.107
Episode Count:  2237 	 Cumulative Reward:  106.86 	 eps:  0.107
Episode Count:  2238 	 Cumulative Reward:  30.95 	 eps:  0.106
Episode Count:  2239 	 Cumulative Reward:  13.26 	 eps:  0.106
Episode Count:  2240 	 Cumulative Reward:  39.76 	 eps:  0.106
Episode Count:  2241 	 Cumulative Reward:  36.64 	 eps:  0.106
Episode Count:  2242 	 Cumulative Reward:  68.01 	 eps:  0.106
Episode Count:  2243 	 Cumulative Reward:  21.18 	 eps:  0.106
Episode Count:  2244 	 Cumulative Reward:  51.66 	 eps:  0.106
Episode Count:  2245 	 Cumulative Reward:  13.32 	 eps:  0.106
Episode Count:  2246 	 Cumulative Reward:  53.48 	 eps:  0.106
Episode Count:  2247 	 Cumulative Reward:  31.51 	 eps:  0.105
Episode Count:  2248 	 Cumulative Reward:  46.29 	 eps:  0.105
Episode Count:  2249 	 Cumulative Reward:  70.49 	 eps:  0.105
Episode Count:  2250 	 Cumulative Reward:  18.88 	 eps

Episode Count:  2360 	 Cumulative Reward:  23.88 	 eps:  0.1
Episode Count:  2361 	 Cumulative Reward:  41.59 	 eps:  0.1
Episode Count:  2362 	 Cumulative Reward:  106.83 	 eps:  0.1
Episode Count:  2363 	 Cumulative Reward:  31.87 	 eps:  0.1
Episode Count:  2364 	 Cumulative Reward:  47.6 	 eps:  0.1
Episode Count:  2365 	 Cumulative Reward:  82.1 	 eps:  0.1
Episode Count:  2366 	 Cumulative Reward:  35.76 	 eps:  0.1
Episode Count:  2367 	 Cumulative Reward:  40.3 	 eps:  0.1
Episode Count:  2368 	 Cumulative Reward:  30.43 	 eps:  0.1
Episode Count:  2369 	 Cumulative Reward:  24.94 	 eps:  0.1
Episode Count:  2370 	 Cumulative Reward:  15.05 	 eps:  0.1
Episode Count:  2371 	 Cumulative Reward:  32.14 	 eps:  0.1
Episode Count:  2372 	 Cumulative Reward:  20.14 	 eps:  0.1
Episode Count:  2373 	 Cumulative Reward:  25.08 	 eps:  0.1
Episode Count:  2374 	 Cumulative Reward:  55.07 	 eps:  0.1
Episode Count:  2375 	 Cumulative Reward:  9.86 	 eps:  0.1
Episode Count:  2376 	 Cumu

Episode Count:  2483 	 Cumulative Reward:  19.14 	 eps:  0.1
Episode Count:  2484 	 Cumulative Reward:  21.71 	 eps:  0.1
Episode Count:  2485 	 Cumulative Reward:  20.15 	 eps:  0.1
Episode Count:  2486 	 Cumulative Reward:  23.55 	 eps:  0.1
Episode Count:  2487 	 Cumulative Reward:  17.12 	 eps:  0.1
Episode Count:  2488 	 Cumulative Reward:  7.65 	 eps:  0.1
Episode Count:  2489 	 Cumulative Reward:  24.5 	 eps:  0.1
Episode Count:  2490 	 Cumulative Reward:  18.25 	 eps:  0.1
Episode Count:  2491 	 Cumulative Reward:  37.16 	 eps:  0.1
Episode Count:  2492 	 Cumulative Reward:  31.92 	 eps:  0.1
Episode Count:  2493 	 Cumulative Reward:  24.69 	 eps:  0.1
Episode Count:  2494 	 Cumulative Reward:  30.59 	 eps:  0.1
Episode Count:  2495 	 Cumulative Reward:  16.86 	 eps:  0.1
Episode Count:  2496 	 Cumulative Reward:  26.18 	 eps:  0.1
Episode Count:  2497 	 Cumulative Reward:  20.26 	 eps:  0.1
Episode Count:  2498 	 Cumulative Reward:  41.04 	 eps:  0.1
Episode Count:  2499 	 Cum

In [11]:
# evaluate_agent(runs = 10, model_name = 'DuelingDdqn.h5', env = None)