# Deep Recurrent Q-Network for VizDoom


In [1]:
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy.misc
import os
import csv
import itertools
import tensorflow.contrib.slim as slim
%matplotlib inline

from helper2 import *

### Load the game environment

In [2]:
from vizdoom import *
a_size = 7 # Agent can move Left, Right, or Fire
image_size = 84

use_other_buffers = False #Wheter we use the depth buffer and label buffer instead of the screen buffer
use_RGB = False #Whether we use GRB or black and white
if use_other_buffers == True:
    image_chls = 2
else:
    if use_RGB == True:
        image_chls = 3
    else:
        image_chls = 1

#The Below code is related to setting up the Doom environment
game = DoomGame()
game.set_doom_scenario_path("deadly_corridor.wad")  #This corresponds to the simple task we will pose our agent
game.load_config("deadly_corridor.cfg")
game.set_doom_map("map01")
game.set_screen_resolution(ScreenResolution.RES_160X120)

if use_RGB == True:
    game.set_screen_format(ScreenFormat.RGB8)
else:
    game.set_screen_format(ScreenFormat.GRAY8)
    
game.set_render_hud(False)
game.set_render_crosshair(False)
game.set_render_weapon(True)
game.set_render_decals(False)
game.set_render_particles(False)

#Enable other usefull buffers for test purpose
game.set_depth_buffer_enabled(True)
game.set_automap_buffer_enabled(True)
game.set_labels_buffer_enabled(True)

#game.add_available_button(Button.TURN_LEFT)
#game.add_available_button(Button.TURN_RIGHT)
#game.add_available_button(Button.ATTACK)
actions_list = np.identity(a_size,dtype=bool).tolist()
print(actions_list)

game.add_available_game_variable(GameVariable.AMMO2)
game.add_available_game_variable(GameVariable.POSITION_X)
game.add_available_game_variable(GameVariable.POSITION_Y)
game.set_episode_timeout(300)
game.set_episode_start_time(0)
game.set_window_visible(False)
game.set_sound_enabled(False)
game.set_living_reward(-1)
game.set_mode(Mode.PLAYER)
game.init()

#End Doom set-up

env = game

[[True, False, False, False, False, False, False], [False, True, False, False, False, False, False], [False, False, True, False, False, False, False], [False, False, False, True, False, False, False], [False, False, False, False, True, False, False], [False, False, False, False, False, True, False], [False, False, False, False, False, False, True]]


### Implementing the network itself

In [3]:
class Qnetwork():
    def __init__(self,h_size,rnn_cell,myScope,learning_rate):
        #The network recieves a frame from the game, flattened into an array.
        #It then resizes it and processes it through four convolutional layers.
        
            
        self.scalarInput =  tf.placeholder(shape=[None,image_size * image_size * image_chls],dtype=tf.float32)
        self.imageIn = tf.reshape(self.scalarInput,shape=[-1,image_size,image_size,image_chls])
        self.conv1 = slim.convolution2d( \
            inputs=self.imageIn,num_outputs=32,\
            kernel_size=[8,8],stride=[4,4],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv1')
        self.conv2 = slim.convolution2d( \
            inputs=self.conv1,num_outputs=64,\
            kernel_size=[4,4],stride=[2,2],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv2')
        self.conv3 = slim.convolution2d( \
            inputs=self.conv2,num_outputs=64,\
            kernel_size=[3,3],stride=[1,1],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv3')
        self.conv4 = slim.convolution2d( \
            inputs=self.conv3,num_outputs=h_size,\
            kernel_size=[7,7],stride=[1,1],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv4')
        
        self.trainLength = tf.placeholder(dtype=tf.int32)
        #We take the output from the final convolutional layer and send it to a recurrent layer.
        #The input must be reshaped into [batch x trace x units] for rnn processing, 
        #and then returned to [batch x units] when sent through the upper levles.
        self.batch_size = tf.placeholder(dtype=tf.int32,shape=[])
        self.convFlat = tf.reshape(slim.flatten(self.conv4),[self.batch_size,self.trainLength,h_size])
        self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32)
        self.rnn,self.rnn_state = tf.nn.dynamic_rnn(\
                inputs=self.convFlat,cell=rnn_cell,dtype=tf.float32,initial_state=self.state_in,scope=myScope+'_rnn')
        self.rnn = tf.reshape(self.rnn,shape=[-1,h_size])
        #The output from the recurrent player is then split into separate Value and Advantage streams
        self.streamA,self.streamV = tf.split(self.rnn,2,1)
        self.AW = tf.Variable(tf.random_normal([h_size//2,a_size]))
        self.VW = tf.Variable(tf.random_normal([h_size//2,1]))
        self.Advantage = tf.matmul(self.streamA,self.AW)
        self.Value = tf.matmul(self.streamV,self.VW)
        
        self.salience = tf.gradients(self.Advantage,self.imageIn)
        #Then combine them together to get our final Q-values.
        self.Qout = self.Value + tf.subtract(self.Advantage,tf.reduce_mean(self.Advantage,axis=1,keep_dims=True))
        self.predict = tf.argmax(self.Qout,1)
        
        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.targetQ = tf.placeholder(shape=[None],dtype=tf.float32)
        self.actions = tf.placeholder(shape=[None],dtype=tf.int32)
        self.actions_onehot = tf.one_hot(self.actions,a_size,dtype=tf.float32)
        
        self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1)
        
        self.td_error = tf.square(self.targetQ - self.Q)
        
        #In order to only propogate accurate gradients through the network, we will mask the first
        #half of the losses for each trace as per Lample & Chatlot 2016
        self.maskA = tf.zeros([self.batch_size,self.trainLength//2])
        self.maskB = tf.ones([self.batch_size,self.trainLength//2])
        self.mask = tf.concat([self.maskA,self.maskB],1)
        self.mask = tf.reshape(self.mask,[-1])
        self.loss = tf.reduce_mean(self.td_error * self.mask)
        
        
        self.trainer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.updateModel = self.trainer.minimize(self.loss)

### Experience Replay

These classes allow us to store experies and sample then randomly to train the network.
Episode buffer stores experiences for each individal episode.
Experience buffer stores entire episodes of experience, and sample() allows us to get training batches needed from the network.

In [41]:
class experience_buffer():
    def __init__(self, trace_length = 8, buffer_size = 1000):
        self.buffer = []
        self.buffer_size = buffer_size
        self.trace_length = trace_length

        #Initialize counters and buffers for prioritixed replay
        self.episode_index = -1
        self.alpha0 = 0.5 #Start-value of alpha, the prioritized replay probability exponent. Annealing is linear to 0.
        self.alpha = self.alpha0
        self.exp_prio_tuples = []
        
    def add(self,episode):
        #Compute the sampling priority of this episode in episode replay and update the sum of priorities
        
        episode = np.reshape(np.array(episode),[len(episode),6])
        self.td_error = episode[:, 5]
        self.priority = np.absolute(self.td_error) + 1e-18 #proportionnal priority

        #Need something to avoid self.exp_prio_tuples to grow infinitly
        if len(self.buffer)+1 > self.buffer_size:
            cntr = 0
            while self.exp_prio_tuples[0][0] == self.exp_prio_tuples[1][0]:
                self.exp_prio_tuples.pop(0)
                cntr += 1
            self.exp_prio_tuples.pop(0)
            cntr += 1
            print('length of poped element = ' + str(len(self.buffer[0]))+ ' , cntr = ' + str(cntr)+' , diff = ' +str(len(self.buffer[0])-cntr))
            self.buffer.pop(0)
        
        #Append episode to the priority replay tuple list
        #Every experience in the episode has a tuple of the form:
        #(episode_index, experience_index, priority, len(episode))
        self.episode_index += 1
        for experience_index in range(self.trace_length-1, len(episode)):
            tup = (self.episode_index, experience_index, float(self.priority[experience_index]), len(episode)-self.trace_length)
            self.exp_prio_tuples.append(tup)
        
        self.buffer.append(episode)
        
        
        '''
        if len(self.buffer) + 1 >= self.buffer_size:
            self.buffer[0:(1+len(self.buffer))-self.buffer_size] = []
        self.buffer.append(episode)
        '''
            
    def sample(self,batch_size):
        #Ramdomly select a number of episodes egual to batch_size 
        sampled_episodes = random.sample(self.buffer,batch_size)
        #Within the selected episodes, randomly select an experience trace of length trace_length
        sampledTraces = []
        for episode in sampled_episodes:
            point = np.random.randint(0,len(episode)+1-self.trace_length)
            sampledTraces.append(episode[point:point+self.trace_length])
        sampledTraces = np.array(sampledTraces)
        
        return np.reshape(sampledTraces,[batch_size*self.trace_length,6])
    
    def PRsample(self,batch_size):
        #alpha annealing
        self.alpha = self.alpha0 - (self.episode_index * self.alpha0/num_episodes)
        #Compute the sampling probability distribution

        
        priorities_poweralpha = np.power([tup[2] for tup in self.exp_prio_tuples],myBuffer.alpha)
        sum_priorities_poweralpha = np.sum(priorities_poweralpha)
        sampling_probabilities = np.divide(priorities_poweralpha, sum_priorities_poweralpha)
        #Sample episodes using the computed distribution
        sampled_indexes = np.random.choice(len(self.exp_prio_tuples), batch_size, p = sampling_probabilities)
        sampled_tuples = [self.exp_prio_tuples[idx] for idx in sampled_indexes]
        ep_idx = [tup[0] for tup in sampled_tuples]
        print('ep_idx = ' + str(ep_idx))
        exp_idx = [tup[1] for tup in sampled_tuples]
        print('exp_idx = ' + str(exp_idx))
        sampledTraces = []
        if self.episode_index < self.buffer_size:
            idx_offset = 0
        else:
            idx_offset = self.episode_index - self.buffer_size +1
        print('idx_offset = ' + str(idx_offset) + ', self.episode_index = ' + str(self.episode_index) + ', len(self.buffer) = ' + str(len(self.buffer)))
        for i in range(0,batch_size):
            print('ep_idx[i] = ' + str(ep_idx[i]))
            sampled_ep = myBuffer.buffer[ep_idx[i] - idx_offset]#does This not work?
            #print('len(sampled_ep) =' + str(len(sampled_ep)))
            sampled_ep = np.reshape(np.array(sampled_ep),[len(sampled_ep),6])
            sampled_trace = sampled_ep[exp_idx[i]+1-self.trace_length:exp_idx[i]+1]
            print('buffer_index = ' + str(ep_idx[i] - idx_offset) + ' , len(sampled_ep) = ' + str(len(sampled_ep)) + ', trace = from ' + str(exp_idx[i]+1-self.trace_length) + ' to ' + str(exp_idx[i]+1))
            sampledTraces.append(sampled_trace)
        sampledTraces = np.array(sampledTraces)
        print('sampledTraces.shape = ' + str(sampledTraces.shape))
        return np.reshape(sampledTraces,[batch_size*self.trace_length,6])
    
    def save(self, path2mdl):
        #Save only last 40 experiences in buffer otherwise ridiculously large file
        np.save(path2mdl + '/experienceBuffer.npy', self.buffer[-40:])
    
    def load(self, path2mdl):
        self.buffer = list(np.load(path2mdl + '/experienceBuffer.npy'))

### Training the network

In [44]:
#Setting the training parameters
batch_size = 32 #How many experience traces to use for each training step.
trace_length = 8 #How long each experience trace will be when training
update_freq = 5 #How often to perform a training step.
y = .99 #Discount factor on the target Q-values
startE = 1 #Starting chance of random action
endE = 0.1 #Final chance of random action

prioritized_replay = True
load_model = False #Whether to load a saved model.
if load_model == True:
    last_saved_ep = 3000 #This parameter has to be updated to the last checkpoint
else:
    last_saved_ep = 0
path2mdl = "../DeepRL-Agents-Results/drqn" #The path to save our model to.
path2center = "../DeepRL-Agents-Results/Center" #The path to save the Center information to
h_size = 512 #The size of the final convolutional layer before splitting it into Advantage and Value streams.
buffer_size = 300 #Size of the episode buffer in number of episodes
max_epLength = 300 #The max allowed length of our episode.
anneling_steps = max_epLength*2000 #How many steps of training to reduce startE to endE.
num_episodes = 10000 #How many episodes of game environment to train network with.
pre_train_steps = max_epLength*100 #max_epLength*100 #How many steps of random actions before training begins. need to be a multiple of max_epLength
time_per_step =  0.025 #Length of each step used in gif creation
summaryLength = 100 #Number of epidoes to periodically save for analysis
tau = 0.001 #Rate at with the target network is update in regards to the main network
learning_rate = 0.0001

In [None]:
#


#We define the cells for the primary and target q-networks

tf.reset_default_graph()

cell = tf.contrib.rnn.BasicLSTMCell(num_units=h_size,state_is_tuple=True)
cellT = tf.contrib.rnn.BasicLSTMCell(num_units=h_size,state_is_tuple=True)
mainQN = Qnetwork(h_size,cell,'main',learning_rate)
targetQN = Qnetwork(h_size,cellT,'target',learning_rate)
trainables = tf.trainable_variables()
init = tf.global_variables_initializer()
targetOps = updateTargetGraph(trainables,tau)
saver = tf.train.Saver(max_to_keep=5)

#create lists to contain total rewards and steps per episode
jList = []
rList = []


#Set the rate of random action decrease. 
e = startE
stepDrop = (startE - endE)/anneling_steps

#Make a path for our model to be saved in.
if not os.path.exists(path2mdl):
    os.makedirs(path2mdl)

with tf.Session() as sess:
    if load_model == True:
        print ('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(path2mdl)
        saver.restore(sess,ckpt.model_checkpoint_path)
        #Rough (over)estimate of the total number of steps since the beginning of training
        total_steps = last_saved_ep*300/update_freq
        myBuffer = experience_buffer(trace_length, buffer_size)
        myBuffer.load(path2mdl)
    else:
        #INITIALIZE VARIABLES AND MODEL


        myBuffer = experience_buffer(trace_length,buffer_size)

        total_steps = 0
        
        sess.run(init)
        #Write the first line of the master log-file for the Control Center
        with open(path2center + '/log.csv', 'w') as myfile:
            wr = csv.writer(myfile, quoting=csv.QUOTE_ALL, lineterminator = '\n')
            wr.writerow(['Episode','Length','Reward','IMG','LOG','SAL'])   
        #Set the target network to be equal to the primary network.
        updateTarget(targetOps,sess)
    
    for i in range(last_saved_ep, num_episodes):
        #print(i)
        episodeBuffer = []
        
        #Reset environment and get first new observation
        env.new_episode()
        if use_other_buffers == True:
            st = game.get_state()
            dP = st.depth_buffer
            lP = st.labels_buffer
            sP = st.screen_buffer
            s = processBuffers(image_size, dP, lP, sP)
        else:
            sP = env.get_state().screen_buffer
            s = processImage(sP, image_size)
        d = False
        rAll = 0
        j = 0
        #Reset the recurrent layer's hidden state every episode
        state = (np.zeros([1,h_size]),np.zeros([1,h_size])) 
        #The Q-Network
        while j < max_epLength:
            
            if image_chls == 2:
                s_in = s[0:-image_size*image_size]
            else:
                s_in = s
            j+=1
            #Choose an action by greedily (with e chance of random action) from the Q-network
            if np.random.rand(1) < e or total_steps < pre_train_steps:
                #Only update the state of the RNN layer

                state1 = sess.run(mainQN.rnn_state,
                                  feed_dict={mainQN.scalarInput:[s_in/255.0],
                                             mainQN.trainLength:1, 
                                             mainQN.state_in:state,
                                             mainQN.batch_size:1})
                #Choose an action randomly
                a = np.random.randint(0,a_size)
                
            else:
                #Update the state of the RNN layer AND choose the best action
                a, state1 = sess.run([mainQN.predict,mainQN.rnn_state],
                                     feed_dict={mainQN.scalarInput:[s_in/255.0],
                                                mainQN.trainLength:1,
                                                mainQN.state_in:state,
                                                mainQN.batch_size:1})
                a = a[0]
                
            r = env.make_action(actions_list[a])
            d = env.is_episode_finished()
            if d == False:
                if use_other_buffers == True:
                    st1 = game.get_state()
                    d1P = st1.depth_buffer
                    l1P = st1.labels_buffer
                    s1P = st1.screen_buffer
                    s1 = processBuffers(image_size, d1P, l1P, s1P)
                else:
                    s1P = env.get_state().screen_buffer
                    s1 = processImage(s1P, image_size)
            else:
                break

            total_steps += 1
            
            #Compute the td error to use for prioritized replay
            if image_chls == 2:
                s1_in = s1[0:-image_size*image_size]
            else:
                s1_in = s1
            
            Q1 = sess.run(mainQN.predict,
                          feed_dict={mainQN.scalarInput:[s1_in/255.0],
                                     mainQN.trainLength:1,
                                     mainQN.state_in:state1,
                                     mainQN.batch_size:1})
                    
            Q2 = sess.run(targetQN.Qout,
                          feed_dict={targetQN.scalarInput:[s1_in/255.0],
                                     targetQN.trainLength:1,
                                     targetQN.state_in:state1,
                                     targetQN.batch_size:1})
            
            #print('Q1.shape = ' + str(Q1.shape))
            #print('Q2.shape = ' + str(Q2.shape))        
            end_multiplier = -(d - 1)
            doubleQ = Q2[0, Q1]
            #print('doubleQ.shape = ' + str(doubleQ.shape))
            targetQ = r + (y*doubleQ * end_multiplier)
            #print('targetQ.shape = ' + str(targetQ.shape))
            currentaction = np.array(a, ndmin=1)
            #print('currentaction.shape = ' + str(currentaction.shape))

            td = sess.run(mainQN.td_error,
                     feed_dict={mainQN.scalarInput:[s_in/255.0],
                                mainQN.targetQ:targetQ,
                                mainQN.actions:currentaction,
                                mainQN.trainLength:1,
                                mainQN.state_in:state,
                                mainQN.batch_size:1})
            

            
            episodeBuffer.append(np.reshape(np.array([s,a,r,s1,d,td]),[1,6]))
            
            if total_steps > pre_train_steps:
                if e > endE:
                    e -= stepDrop
                    print('epsilon is = ' + str(e))
                #Update the networks at a cetain frequency (every n experiences)
                if total_steps % (update_freq) == 0:
                    updateTarget(targetOps,sess)
                    #Reset the recurrent layer's hidden state
                    state_train = (np.zeros([batch_size,h_size]),np.zeros([batch_size,h_size])) 
                    #Get a random batch of experiences.
                    if prioritized_replay == True:
                        trainBatch = myBuffer.PRsample(batch_size)
                    else:
                        trainBatch = myBuffer.sample(batch_size)

                    train_s = list(zip(trainBatch[:, 0]))
                    train_s1 = list(zip(trainBatch[:, 3]))
                    train_s = np.vstack(train_s)
                    train_s1 = np.vstack(train_s1)

                    if image_chls == 2:
                        train_s = train_s[:,0:-image_size*image_size]
                        train_s1 = train_s1[:,0:-image_size*image_size]

                    #Below we perform the Double-DQN update to the target Q-values
                    Q1 = sess.run(mainQN.predict,
                                  feed_dict={mainQN.scalarInput:np.vstack(train_s1/255.0),
                                             mainQN.trainLength:trace_length,
                                             mainQN.state_in:state_train,
                                             mainQN.batch_size:batch_size})
                    
                    Q2 = sess.run(targetQN.Qout,
                                  feed_dict={targetQN.scalarInput:np.vstack(train_s1/255.0),
                                             targetQN.trainLength:trace_length,
                                             targetQN.state_in:state_train,
                                             targetQN.batch_size:batch_size})
                    
                    end_multiplier = -(trainBatch[:,4] - 1)
                    doubleQ = Q2[range(batch_size*trace_length),Q1]
                    targetQ = trainBatch[:,2] + (y*doubleQ * end_multiplier)
                    
                    #Update the network with our target values.
                    sess.run(mainQN.updateModel,
                             feed_dict={mainQN.scalarInput:np.vstack(train_s/255.0),
                                        mainQN.targetQ:targetQ,
                                        mainQN.actions:trainBatch[:,1],
                                        mainQN.trainLength:trace_length,
                                        mainQN.state_in:state_train,
                                        mainQN.batch_size:batch_size})
            rAll += r
            s = s1
            state = state1
            
            if use_other_buffers == True:
                lP = l1P
                dP = d1P
                sP = s1P
            else:
                sP = s1P
            

            if d == True:

                break

        #Add the episode to the experience buffer
        bufferArray = np.array(episodeBuffer)
        episodeBuffer = list(zip(bufferArray))
        myBuffer.add(episodeBuffer)
        jList.append(j)
        rList.append(rAll)

        #Periodically save the model. 
        if i % 1000 == 0 and i != last_saved_ep:
            saver.save(sess, path2mdl + '/model-'+str(i)+'.cptk', global_step = i)
            myBuffer.save(path2mdl)
            print ("Saved Model")
        if len(rList) % summaryLength == 0 and len(rList) != 0:
            #print (total_steps,np.mean(rList[-summaryLength:]), e)
            saveToCenter(i,rList,jList,
                         np.reshape(np.array(episodeBuffer),[len(episodeBuffer),6]),
                         summaryLength,
                         h_size,sess,mainQN,time_per_step,
                         image_size, image_chls, image_chls,
                         path2center)
    saver.save(sess,path2mdl + '/model-'+str(i)+'.cptk')

Target Set Success


 99%|████████████████████████████████████████████████████████████████████████████████ | 84/85 [00:00<00:00, 397.84it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 117/117 [00:00<00:00, 395.01it/s]
 99%|██████████████████████████████████████████████████████████████████████████████▏| 101/102 [00:00<00:00, 375.21it/s]


length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 78 , cntr = 71 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 299 , cntr = 292 , diff = 7
length of poped element = 210 , cntr = 203 , diff = 7
length of poped element = 166 , cntr = 159 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 224 , cntr = 217 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 299 , cntr = 292 , diff = 7
length of poped element = 74 , cntr = 67 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
length of poped element = 81 , cntr = 74 , diff = 7
le

buffer_index = 154 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 288
buffer_index = 268 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 149
buffer_index = 129 , len(sampled_ep) = 82, trace = from 54 to 62
ep_idx[i] = 45
buffer_index = 25 , len(sampled_ep) = 100, trace = from 68 to 76
ep_idx[i] = 163
buffer_index = 143 , len(sampled_ep) = 134, trace = from 101 to 109
ep_idx[i] = 62
buffer_index = 42 , len(sampled_ep) = 77, trace = from 38 to 46
ep_idx[i] = 128
buffer_index = 108 , len(sampled_ep) = 74, trace = from 53 to 61
ep_idx[i] = 317
buffer_index = 297 , len(sampled_ep) = 84, trace = from 12 to 20
ep_idx[i] = 62
buffer_index = 42 , len(sampled_ep) = 77, trace = from 5 to 13
ep_idx[i] = 166
buffer_index = 146 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 253
buffer_index = 233 , len(sampled_ep) = 76, trace = from 34 to 42
ep_idx[i] = 69
buffer_index = 49 , len(sampled_ep) = 44, trace = from 3 to 11
ep_idx[i] = 182
buffer_index = 162 , len(sampled_ep)

buffer_index = 56 , len(sampled_ep) = 198, trace = from 162 to 170
ep_idx[i] = 192
buffer_index = 172 , len(sampled_ep) = 78, trace = from 61 to 69
ep_idx[i] = 171
buffer_index = 151 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 35
buffer_index = 15 , len(sampled_ep) = 82, trace = from 20 to 28
ep_idx[i] = 47
buffer_index = 27 , len(sampled_ep) = 83, trace = from 25 to 33
ep_idx[i] = 50
buffer_index = 30 , len(sampled_ep) = 81, trace = from 38 to 46
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9999610000000003
epsilon is = 0.9999595000000003
epsilon is = 0.9999580000000003
epsilon is = 0.9999565000000004
epsilon is = 0.9999550000000004
Target Set Success
ep_idx = [196, 87, 158, 318, 87, 170, 250, 145, 217, 70, 164, 123, 286, 312, 318, 115, 80, 44, 65, 193, 119, 240, 76, 129, 298, 82, 296, 27, 223, 165, 300, 160]
exp_idx = [12, 32, 64, 72, 46, 18, 32, 93, 75, 145, 27, 148, 13, 24, 44, 15, 54, 60, 13, 104, 150, 54, 144, 16, 74, 17, 13, 31, 102, 80, 76, 60]
idx_offset = 20,

buffer_index = 203 , len(sampled_ep) = 112, trace = from 68 to 76
ep_idx[i] = 287
buffer_index = 267 , len(sampled_ep) = 194, trace = from 107 to 115
ep_idx[i] = 30
buffer_index = 10 , len(sampled_ep) = 75, trace = from 26 to 34
ep_idx[i] = 285
buffer_index = 265 , len(sampled_ep) = 166, trace = from 135 to 143
ep_idx[i] = 23
buffer_index = 3 , len(sampled_ep) = 109, trace = from 23 to 31
ep_idx[i] = 53
buffer_index = 33 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 127
buffer_index = 107 , len(sampled_ep) = 50, trace = from 5 to 13
ep_idx[i] = 77
buffer_index = 57 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 168
buffer_index = 148 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 222
buffer_index = 202 , len(sampled_ep) = 224, trace = from 112 to 120
ep_idx[i] = 307
buffer_index = 287 , len(sampled_ep) = 48, trace = from 29 to 37
ep_idx[i] = 44
buffer_index = 24 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 115
buffer_index = 95 , len(sampl

buffer_index = 102 , len(sampled_ep) = 198, trace = from 91 to 99
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9999235000000006
epsilon is = 0.9999220000000006
epsilon is = 0.9999205000000007
epsilon is = 0.9999190000000007
epsilon is = 0.9999175000000007
Target Set Success
ep_idx = [221, 68, 139, 76, 76, 174, 208, 287, 104, 92, 203, 317, 119, 207, 68, 248, 306, 234, 71, 253, 102, 74, 124, 254, 265, 259, 217, 71, 200, 249, 254, 103]
exp_idx = [36, 74, 41, 179, 53, 66, 75, 72, 18, 226, 22, 75, 171, 20, 76, 20, 33, 85, 95, 11, 28, 17, 74, 115, 76, 69, 52, 180, 47, 34, 97, 46]
idx_offset = 21, self.episode_index = 320, len(self.buffer) = 300
ep_idx[i] = 221
buffer_index = 200 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 68
buffer_index = 47 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 139
buffer_index = 118 , len(sampled_ep) = 133, trace = from 34 to 42
ep_idx[i] = 76
buffer_index = 55 , len(sampled_ep) = 198, trace = from 172 to 180
ep_idx[i] = 76
buffer_inde

buffer_index = 49 , len(sampled_ep) = 198, trace = from 136 to 144
ep_idx[i] = 41
buffer_index = 20 , len(sampled_ep) = 85, trace = from 70 to 78
ep_idx[i] = 96
buffer_index = 75 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 82
buffer_index = 61 , len(sampled_ep) = 73, trace = from 36 to 44
ep_idx[i] = 219
buffer_index = 198 , len(sampled_ep) = 110, trace = from 40 to 48
ep_idx[i] = 145
buffer_index = 124 , len(sampled_ep) = 117, trace = from 93 to 101
ep_idx[i] = 179
buffer_index = 158 , len(sampled_ep) = 77, trace = from 2 to 10
ep_idx[i] = 251
buffer_index = 230 , len(sampled_ep) = 133, trace = from 31 to 39
ep_idx[i] = 228
buffer_index = 207 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 215
buffer_index = 194 , len(sampled_ep) = 117, trace = from 24 to 32
ep_idx[i] = 251
buffer_index = 230 , len(sampled_ep) = 133, trace = from 78 to 86
ep_idx[i] = 28
buffer_index = 7 , len(sampled_ep) = 78, trace = from 65 to 73
ep_idx[i] = 274
buffer_index = 253 , len(sampled

exp_idx = [68, 22, 14, 11, 130, 80, 53, 52, 32, 75, 22, 8, 219, 20, 15, 65, 14, 28, 150, 170, 45, 71, 46, 94, 20, 60, 40, 16, 48, 47, 67, 13]
idx_offset = 21, self.episode_index = 320, len(self.buffer) = 300
ep_idx[i] = 284
buffer_index = 263 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 165
buffer_index = 144 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 180
buffer_index = 159 , len(sampled_ep) = 141, trace = from 7 to 15
ep_idx[i] = 183
buffer_index = 162 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 139
buffer_index = 118 , len(sampled_ep) = 133, trace = from 123 to 131
ep_idx[i] = 252
buffer_index = 231 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 227
buffer_index = 206 , len(sampled_ep) = 112, trace = from 46 to 54
ep_idx[i] = 306
buffer_index = 285 , len(sampled_ep) = 117, trace = from 45 to 53
ep_idx[i] = 256
buffer_index = 235 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 48
buffer_index = 27 , len(sampled_ep) = 81, tr

buffer_index = 167 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 184
buffer_index = 163 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 192
buffer_index = 171 , len(sampled_ep) = 78, trace = from 33 to 41
ep_idx[i] = 210
buffer_index = 189 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 98
buffer_index = 77 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 147
buffer_index = 126 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 118
buffer_index = 97 , len(sampled_ep) = 111, trace = from 26 to 34
ep_idx[i] = 160
buffer_index = 139 , len(sampled_ep) = 117, trace = from 80 to 88
ep_idx[i] = 101
buffer_index = 80 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 142
buffer_index = 121 , len(sampled_ep) = 108, trace = from 66 to 74
ep_idx[i] = 60
buffer_index = 39 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 295
buffer_index = 274 , len(sampled_ep) = 81, trace = from 17 to 25
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9

buffer_index = 231 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 232
buffer_index = 211 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 23
buffer_index = 2 , len(sampled_ep) = 109, trace = from 22 to 30
ep_idx[i] = 100
buffer_index = 79 , len(sampled_ep) = 102, trace = from 89 to 97
ep_idx[i] = 222
buffer_index = 201 , len(sampled_ep) = 224, trace = from 161 to 169
ep_idx[i] = 152
buffer_index = 131 , len(sampled_ep) = 228, trace = from 149 to 157
ep_idx[i] = 119
buffer_index = 98 , len(sampled_ep) = 183, trace = from 115 to 123
ep_idx[i] = 89
buffer_index = 68 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 314
buffer_index = 293 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 205
buffer_index = 184 , len(sampled_ep) = 78, trace = from 7 to 15
ep_idx[i] = 269
buffer_index = 248 , len(sampled_ep) = 45, trace = from 12 to 20
ep_idx[i] = 312
buffer_index = 291 , len(sampled_ep) = 108, trace = from 30 to 38
ep_idx[i] = 115
buffer_index = 94 , len(

ep_idx[i] = 188
buffer_index = 167 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 312
buffer_index = 291 , len(sampled_ep) = 108, trace = from 71 to 79
ep_idx[i] = 128
buffer_index = 107 , len(sampled_ep) = 74, trace = from 53 to 61
ep_idx[i] = 253
buffer_index = 232 , len(sampled_ep) = 76, trace = from 4 to 12
ep_idx[i] = 89
buffer_index = 68 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 24
buffer_index = 3 , len(sampled_ep) = 112, trace = from 71 to 79
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9998260000000014
epsilon is = 0.9998245000000014
epsilon is = 0.9998230000000015
epsilon is = 0.9998215000000015
epsilon is = 0.9998200000000015
Target Set Success
ep_idx = [21, 146, 260, 198, 300, 137, 84, 191, 45, 257, 51, 109, 219, 148, 176, 226, 232, 144, 224, 186, 35, 268, 136, 135, 58, 79, 287, 23, 129, 128, 62, 201]
exp_idx = [41, 71, 46, 11, 34, 79, 31, 127, 9, 64, 47, 41, 7, 67, 21, 13, 71, 17, 19, 66, 17, 87, 88, 50, 13, 21, 59, 72, 20, 39, 32, 23]
idx_off

buffer_index = 54 , len(sampled_ep) = 198, trace = from 63 to 71
ep_idx[i] = 258
buffer_index = 236 , len(sampled_ep) = 76, trace = from 45 to 53
ep_idx[i] = 164
buffer_index = 142 , len(sampled_ep) = 118, trace = from 64 to 72
ep_idx[i] = 275
buffer_index = 253 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 40
buffer_index = 18 , len(sampled_ep) = 116, trace = from 77 to 85
ep_idx[i] = 187
buffer_index = 165 , len(sampled_ep) = 84, trace = from 11 to 19
ep_idx[i] = 237
buffer_index = 215 , len(sampled_ep) = 116, trace = from 74 to 82
ep_idx[i] = 96
buffer_index = 74 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 234
buffer_index = 212 , len(sampled_ep) = 167, trace = from 12 to 20
ep_idx[i] = 129
buffer_index = 107 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 26
buffer_index = 4 , len(sampled_ep) = 75, trace = from 45 to 53
ep_idx[i] = 274
buffer_index = 252 , len(sampled_ep) = 114, trace = from 30 to 38
ep_idx[i] = 235
buffer_index = 213 , len(sample

sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9997885000000017
epsilon is = 0.9997870000000018
epsilon is = 0.9997855000000018
epsilon is = 0.9997840000000018
epsilon is = 0.9997825000000018
Target Set Success
ep_idx = [92, 197, 92, 32, 85, 177, 88, 217, 212, 292, 213, 189, 74, 125, 44, 247, 42, 37, 79, 181, 76, 256, 134, 208, 120, 71, 70, 45, 116, 171, 152, 39]
exp_idx = [263, 23, 277, 26, 77, 18, 19, 74, 7, 62, 85, 16, 22, 55, 17, 53, 12, 22, 25, 35, 101, 25, 32, 59, 16, 19, 186, 57, 69, 30, 88, 69]
idx_offset = 22, self.episode_index = 321, len(self.buffer) = 300
ep_idx[i] = 92
buffer_index = 70 , len(sampled_ep) = 281, trace = from 256 to 264
ep_idx[i] = 197
buffer_index = 175 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 92
buffer_index = 70 , len(sampled_ep) = 281, trace = from 270 to 278
ep_idx[i] = 32
buffer_index = 10 , len(sampled_ep) = 84, trace = from 19 to 27
ep_idx[i] = 85
buffer_index = 63 , len(sampled_ep) = 108, trace = from 70 to 78
ep_idx[i] = 177
buffe

buffer_index = 93 , len(sampled_ep) = 299, trace = from 117 to 125
ep_idx[i] = 276
buffer_index = 254 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 232
buffer_index = 210 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 45
buffer_index = 23 , len(sampled_ep) = 100, trace = from 55 to 63
ep_idx[i] = 186
buffer_index = 164 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 160
buffer_index = 138 , len(sampled_ep) = 117, trace = from 16 to 24
ep_idx[i] = 64
buffer_index = 42 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 301
buffer_index = 279 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 163
buffer_index = 141 , len(sampled_ep) = 134, trace = from 15 to 23
ep_idx[i] = 281
buffer_index = 259 , len(sampled_ep) = 178, trace = from 155 to 163
ep_idx[i] = 306
buffer_index = 284 , len(sampled_ep) = 117, trace = from 80 to 88
ep_idx[i] = 215
buffer_index = 193 , len(sampled_ep) = 117, trace = from 26 to 34
ep_idx[i] = 316
buffer_index = 294 , l

exp_idx = [49, 34, 78, 93, 14, 29, 133, 70, 98, 10, 56, 63, 15, 30, 123, 75, 33, 18, 77, 27, 75, 17, 45, 82, 30, 110, 16, 21, 14, 15, 55, 51]
idx_offset = 22, self.episode_index = 321, len(self.buffer) = 300
ep_idx[i] = 55
buffer_index = 33 , len(sampled_ep) = 76, trace = from 42 to 50
ep_idx[i] = 232
buffer_index = 210 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 185
buffer_index = 163 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 102
buffer_index = 80 , len(sampled_ep) = 103, trace = from 86 to 94
ep_idx[i] = 51
buffer_index = 29 , len(sampled_ep) = 145, trace = from 7 to 15
ep_idx[i] = 267
buffer_index = 245 , len(sampled_ep) = 83, trace = from 22 to 30
ep_idx[i] = 92
buffer_index = 70 , len(sampled_ep) = 281, trace = from 126 to 134
ep_idx[i] = 242
buffer_index = 220 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 274
buffer_index = 252 , len(sampled_ep) = 114, trace = from 91 to 99
ep_idx[i] = 124
buffer_index = 102 , len(sampled_ep) = 81, trace 

buffer_index = 197 , len(sampled_ep) = 110, trace = from 67 to 75
ep_idx[i] = 102
buffer_index = 80 , len(sampled_ep) = 103, trace = from 1 to 9
ep_idx[i] = 302
buffer_index = 280 , len(sampled_ep) = 79, trace = from 45 to 53
ep_idx[i] = 185
buffer_index = 163 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 149
buffer_index = 127 , len(sampled_ep) = 82, trace = from 22 to 30
ep_idx[i] = 264
buffer_index = 242 , len(sampled_ep) = 109, trace = from 49 to 57
ep_idx[i] = 190
buffer_index = 168 , len(sampled_ep) = 42, trace = from 27 to 35
ep_idx[i] = 34
buffer_index = 12 , len(sampled_ep) = 82, trace = from 11 to 19
ep_idx[i] = 25
buffer_index = 3 , len(sampled_ep) = 76, trace = from 25 to 33
ep_idx[i] = 134
buffer_index = 112 , len(sampled_ep) = 208, trace = from 179 to 187
ep_idx[i] = 113
buffer_index = 91 , len(sampled_ep) = 105, trace = from 48 to 56
ep_idx[i] = 234
buffer_index = 212 , len(sampled_ep) = 167, trace = from 112 to 120
sampledTraces.shape = (32, 8, 6)
epsilon is

buffer_index = 176 , len(sampled_ep) = 42, trace = from 33 to 41
ep_idx[i] = 222
buffer_index = 200 , len(sampled_ep) = 224, trace = from 99 to 107
ep_idx[i] = 113
buffer_index = 91 , len(sampled_ep) = 105, trace = from 52 to 60
ep_idx[i] = 201
buffer_index = 179 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 187
buffer_index = 165 , len(sampled_ep) = 84, trace = from 23 to 31
ep_idx[i] = 134
buffer_index = 112 , len(sampled_ep) = 208, trace = from 172 to 180
ep_idx[i] = 140
buffer_index = 118 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 287
buffer_index = 265 , len(sampled_ep) = 194, trace = from 41 to 49
ep_idx[i] = 50
buffer_index = 28 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 134
buffer_index = 112 , len(sampled_ep) = 208, trace = from 174 to 182
ep_idx[i] = 296
buffer_index = 274 , len(sampled_ep) = 108, trace = from 70 to 78
ep_idx[i] = 180
buffer_index = 158 , len(sampled_ep) = 141, trace = from 4 to 12
ep_idx[i] = 270
buffer_index = 248 , 

ep_idx[i] = 151
buffer_index = 129 , len(sampled_ep) = 82, trace = from 10 to 18
ep_idx[i] = 199
buffer_index = 177 , len(sampled_ep) = 116, trace = from 77 to 85
ep_idx[i] = 212
buffer_index = 190 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 103
buffer_index = 81 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 300
buffer_index = 278 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 92
buffer_index = 70 , len(sampled_ep) = 281, trace = from 215 to 223
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9996910000000025
epsilon is = 0.9996895000000026
epsilon is = 0.9996880000000026
epsilon is = 0.9996865000000026
epsilon is = 0.9996850000000026
Target Set Success
ep_idx = [203, 258, 254, 312, 201, 199, 81, 150, 51, 175, 36, 225, 260, 154, 154, 94, 116, 184, 38, 57, 92, 195, 287, 92, 116, 114, 157, 25, 211, 62, 279, 227]
exp_idx = [12, 22, 26, 79, 69, 30, 26, 54, 96, 14, 114, 16, 46, 96, 56, 16, 56, 31, 73, 8, 31, 45, 16, 90, 86, 60, 15, 24, 68, 22, 13, 52]
idx_

buffer_index = 78 , len(sampled_ep) = 102, trace = from 59 to 67
ep_idx[i] = 284
buffer_index = 262 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 287
buffer_index = 265 , len(sampled_ep) = 194, trace = from 153 to 161
ep_idx[i] = 152
buffer_index = 130 , len(sampled_ep) = 228, trace = from 191 to 199
ep_idx[i] = 290
buffer_index = 268 , len(sampled_ep) = 152, trace = from 45 to 53
ep_idx[i] = 158
buffer_index = 136 , len(sampled_ep) = 83, trace = from 23 to 31
ep_idx[i] = 33
buffer_index = 11 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 293
buffer_index = 271 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 285
buffer_index = 263 , len(sampled_ep) = 166, trace = from 98 to 106
ep_idx[i] = 88
buffer_index = 66 , len(sampled_ep) = 114, trace = from 55 to 63
ep_idx[i] = 44
buffer_index = 22 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 257
buffer_index = 235 , len(sampled_ep) = 82, trace = from 41 to 49
ep_idx[i] = 181
buffer_index = 159 , len

sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9996535000000029
epsilon is = 0.9996520000000029
epsilon is = 0.9996505000000029
length of poped element = 74 , cntr = 67 , diff = 7
epsilon is = 0.9996490000000029
epsilon is = 0.9996475000000029
Target Set Success
ep_idx = [309, 202, 114, 281, 30, 87, 211, 115, 256, 182, 222, 154, 171, 152, 185, 130, 71, 287, 300, 182, 249, 286, 251, 221, 85, 118, 239, 107, 134, 92, 47, 168]
exp_idx = [72, 140, 72, 46, 67, 44, 14, 246, 33, 22, 45, 33, 54, 8, 16, 17, 149, 77, 38, 66, 28, 56, 14, 53, 19, 35, 44, 25, 163, 38, 80, 12]
idx_offset = 23, self.episode_index = 322, len(self.buffer) = 300
ep_idx[i] = 309
buffer_index = 286 , len(sampled_ep) = 115, trace = from 65 to 73
ep_idx[i] = 202
buffer_index = 179 , len(sampled_ep) = 143, trace = from 133 to 141
ep_idx[i] = 114
buffer_index = 91 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 281
buffer_index = 258 , len(sampled_ep) = 178, trace = from 39 to 47
ep_idx[i] = 30
buffer_index = 7 , le

ep_idx[i] = 238
buffer_index = 215 , len(sampled_ep) = 79, trace = from 31 to 39
ep_idx[i] = 145
buffer_index = 122 , len(sampled_ep) = 117, trace = from 2 to 10
ep_idx[i] = 148
buffer_index = 125 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 55
buffer_index = 32 , len(sampled_ep) = 76, trace = from 4 to 12
ep_idx[i] = 200
buffer_index = 177 , len(sampled_ep) = 82, trace = from 7 to 15
ep_idx[i] = 26
buffer_index = 3 , len(sampled_ep) = 75, trace = from 2 to 10
ep_idx[i] = 227
buffer_index = 204 , len(sampled_ep) = 112, trace = from 10 to 18
ep_idx[i] = 196
buffer_index = 173 , len(sampled_ep) = 135, trace = from 21 to 29
ep_idx[i] = 122
buffer_index = 99 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 101
buffer_index = 78 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 209
buffer_index = 186 , len(sampled_ep) = 71, trace = from 20 to 28
ep_idx[i] = 232
buffer_index = 209 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 141
buffer_index = 118 ,

exp_idx = [15, 14, 8, 109, 21, 33, 49, 52, 16, 98, 75, 7, 49, 15, 72, 18, 22, 12, 18, 54, 7, 36, 37, 28, 117, 16, 39, 74, 37, 19, 61, 58]
idx_offset = 23, self.episode_index = 322, len(self.buffer) = 300
ep_idx[i] = 145
buffer_index = 122 , len(sampled_ep) = 117, trace = from 8 to 16
ep_idx[i] = 35
buffer_index = 12 , len(sampled_ep) = 82, trace = from 7 to 15
ep_idx[i] = 181
buffer_index = 158 , len(sampled_ep) = 43, trace = from 1 to 9
ep_idx[i] = 199
buffer_index = 176 , len(sampled_ep) = 116, trace = from 102 to 110
ep_idx[i] = 283
buffer_index = 260 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 132
buffer_index = 109 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 122
buffer_index = 99 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 268
buffer_index = 245 , len(sampled_ep) = 120, trace = from 45 to 53
ep_idx[i] = 224
buffer_index = 201 , len(sampled_ep) = 46, trace = from 9 to 17
ep_idx[i] = 39
buffer_index = 16 , len(sampled_ep) = 133, trace = from

buffer_index = 97 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 225
buffer_index = 202 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 222
buffer_index = 199 , len(sampled_ep) = 224, trace = from 79 to 87
ep_idx[i] = 92
buffer_index = 69 , len(sampled_ep) = 281, trace = from 220 to 228
ep_idx[i] = 281
buffer_index = 258 , len(sampled_ep) = 178, trace = from 9 to 17
ep_idx[i] = 187
buffer_index = 164 , len(sampled_ep) = 84, trace = from 14 to 22
ep_idx[i] = 268
buffer_index = 245 , len(sampled_ep) = 120, trace = from 39 to 47
ep_idx[i] = 309
buffer_index = 286 , len(sampled_ep) = 115, trace = from 89 to 97
ep_idx[i] = 100
buffer_index = 77 , len(sampled_ep) = 102, trace = from 86 to 94
ep_idx[i] = 279
buffer_index = 256 , len(sampled_ep) = 48, trace = from 9 to 17
ep_idx[i] = 207
buffer_index = 184 , len(sampled_ep) = 75, trace = from 67 to 75
ep_idx[i] = 128
buffer_index = 105 , len(sampled_ep) = 74, trace = from 22 to 30
sampledTraces.shape = (32, 8, 6)
epsilon i

buffer_index = 128 , len(sampled_ep) = 228, trace = from 214 to 222
ep_idx[i] = 198
buffer_index = 174 , len(sampled_ep) = 42, trace = from 5 to 13
ep_idx[i] = 54
buffer_index = 30 , len(sampled_ep) = 47, trace = from 8 to 16
ep_idx[i] = 141
buffer_index = 117 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 294
buffer_index = 270 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 163
buffer_index = 139 , len(sampled_ep) = 134, trace = from 88 to 96
ep_idx[i] = 89
buffer_index = 65 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 232
buffer_index = 208 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 299
buffer_index = 275 , len(sampled_ep) = 100, trace = from 66 to 74
ep_idx[i] = 79
buffer_index = 55 , len(sampled_ep) = 75, trace = from 7 to 15
ep_idx[i] = 247
buffer_index = 223 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 76
buffer_index = 52 , len(sampled_ep) = 198, trace = from 95 to 103
ep_idx[i] = 154
buffer_index = 130 , len(sampled_

buffer_index = 43 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 178
buffer_index = 154 , len(sampled_ep) = 80, trace = from 30 to 38
ep_idx[i] = 247
buffer_index = 223 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 195
buffer_index = 171 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 222
buffer_index = 198 , len(sampled_ep) = 224, trace = from 64 to 72
ep_idx[i] = 220
buffer_index = 196 , len(sampled_ep) = 118, trace = from 45 to 53
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9995560000000037
epsilon is = 0.9995545000000037
epsilon is = 0.9995530000000037
epsilon is = 0.9995515000000037
epsilon is = 0.9995500000000037
Target Set Success
ep_idx = [118, 56, 112, 235, 227, 320, 154, 257, 106, 38, 138, 225, 106, 147, 66, 183, 134, 157, 263, 214, 273, 226, 158, 105, 139, 285, 223, 124, 136, 202, 34, 292]
exp_idx = [86, 54, 34, 43, 48, 62, 95, 29, 63, 16, 22, 19, 60, 42, 48, 25, 78, 46, 24, 12, 67, 49, 8, 16, 70, 54, 49, 43, 10, 43, 79, 16]
idx_offset = 2

buffer_index = 10 , len(sampled_ep) = 82, trace = from 1 to 9
ep_idx[i] = 144
buffer_index = 120 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 258
buffer_index = 234 , len(sampled_ep) = 76, trace = from 38 to 46
ep_idx[i] = 116
buffer_index = 92 , len(sampled_ep) = 162, trace = from 35 to 43
ep_idx[i] = 226
buffer_index = 202 , len(sampled_ep) = 120, trace = from 108 to 116
ep_idx[i] = 285
buffer_index = 261 , len(sampled_ep) = 166, trace = from 127 to 135
ep_idx[i] = 266
buffer_index = 242 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 197
buffer_index = 173 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 31
buffer_index = 7 , len(sampled_ep) = 51, trace = from 23 to 31
ep_idx[i] = 122
buffer_index = 98 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 174
buffer_index = 150 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 135
buffer_index = 111 , len(sampled_ep) = 211, trace = from 39 to 47
ep_idx[i] = 44
buffer_index = 20 , len(sampl

sampledTraces.shape = (32, 8, 6)
epsilon is = 0.999518500000004
epsilon is = 0.999517000000004
epsilon is = 0.999515500000004
epsilon is = 0.999514000000004
epsilon is = 0.999512500000004
Target Set Success
ep_idx = [43, 107, 68, 213, 320, 287, 216, 44, 290, 320, 134, 69, 284, 310, 44, 92, 227, 115, 227, 115, 157, 322, 188, 257, 310, 96, 68, 293, 31, 294, 318, 315]
exp_idx = [38, 92, 76, 82, 65, 146, 75, 42, 139, 63, 110, 35, 67, 121, 16, 88, 30, 175, 56, 80, 47, 104, 68, 52, 126, 31, 17, 78, 19, 16, 64, 60]
idx_offset = 24, self.episode_index = 323, len(self.buffer) = 300
ep_idx[i] = 43
buffer_index = 19 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 107
buffer_index = 83 , len(sampled_ep) = 109, trace = from 85 to 93
ep_idx[i] = 68
buffer_index = 44 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 213
buffer_index = 189 , len(sampled_ep) = 148, trace = from 75 to 83
ep_idx[i] = 320
buffer_index = 296 , len(sampled_ep) = 71, trace = from 58 to 66
ep_idx[i] = 287
bu

buffer_index = 94 , len(sampled_ep) = 111, trace = from 28 to 36
ep_idx[i] = 161
buffer_index = 137 , len(sampled_ep) = 114, trace = from 42 to 50
ep_idx[i] = 203
buffer_index = 179 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 35
buffer_index = 11 , len(sampled_ep) = 82, trace = from 59 to 67
ep_idx[i] = 272
buffer_index = 248 , len(sampled_ep) = 70, trace = from 60 to 68
ep_idx[i] = 305
buffer_index = 281 , len(sampled_ep) = 71, trace = from 22 to 30
ep_idx[i] = 38
buffer_index = 14 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 122
buffer_index = 98 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 287
buffer_index = 263 , len(sampled_ep) = 194, trace = from 121 to 129
ep_idx[i] = 161
buffer_index = 137 , len(sampled_ep) = 114, trace = from 47 to 55
ep_idx[i] = 134
buffer_index = 110 , len(sampled_ep) = 208, trace = from 85 to 93
ep_idx[i] = 106
buffer_index = 82 , len(sampled_ep) = 103, trace = from 37 to 45
ep_idx[i] = 142
buffer_index = 118 , len(sam

exp_idx = [75, 45, 35, 51, 17, 63, 73, 86, 39, 49, 128, 45, 25, 27, 76, 132, 17, 16, 10, 99, 59, 11, 22, 35, 8, 32, 21, 69, 52, 79, 118, 51]
idx_offset = 24, self.episode_index = 323, len(self.buffer) = 300
ep_idx[i] = 48
buffer_index = 24 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 62
buffer_index = 38 , len(sampled_ep) = 77, trace = from 38 to 46
ep_idx[i] = 294
buffer_index = 270 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 322
buffer_index = 298 , len(sampled_ep) = 111, trace = from 44 to 52
ep_idx[i] = 67
buffer_index = 43 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 98
buffer_index = 74 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 248
buffer_index = 224 , len(sampled_ep) = 77, trace = from 66 to 74
ep_idx[i] = 24
buffer_index = 0 , len(sampled_ep) = 112, trace = from 79 to 87
ep_idx[i] = 180
buffer_index = 156 , len(sampled_ep) = 141, trace = from 32 to 40
ep_idx[i] = 262
buffer_index = 238 , len(sampled_ep) = 81, trace = from 

ep_idx[i] = 144
buffer_index = 119 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 315
buffer_index = 290 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 161
buffer_index = 136 , len(sampled_ep) = 114, trace = from 78 to 86
ep_idx[i] = 212
buffer_index = 187 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 246
buffer_index = 221 , len(sampled_ep) = 47, trace = from 8 to 16
ep_idx[i] = 223
buffer_index = 198 , len(sampled_ep) = 112, trace = from 54 to 62
ep_idx[i] = 302
buffer_index = 277 , len(sampled_ep) = 79, trace = from 67 to 75
ep_idx[i] = 306
buffer_index = 281 , len(sampled_ep) = 117, trace = from 109 to 117
ep_idx[i] = 193
buffer_index = 168 , len(sampled_ep) = 108, trace = from 40 to 48
ep_idx[i] = 39
buffer_index = 14 , len(sampled_ep) = 133, trace = from 58 to 66
ep_idx[i] = 233
buffer_index = 208 , len(sampled_ep) = 45, trace = from 31 to 39
ep_idx[i] = 280
buffer_index = 255 , len(sampled_ep) = 81, trace = from 5 to 13
sampledTraces.shape = (32

buffer_index = 187 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 196
buffer_index = 171 , len(sampled_ep) = 135, trace = from 9 to 17
ep_idx[i] = 255
buffer_index = 230 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 124
buffer_index = 99 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 162
buffer_index = 137 , len(sampled_ep) = 112, trace = from 61 to 69
ep_idx[i] = 66
buffer_index = 41 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 86
buffer_index = 61 , len(sampled_ep) = 77, trace = from 54 to 62
ep_idx[i] = 214
buffer_index = 189 , len(sampled_ep) = 75, trace = from 67 to 75
ep_idx[i] = 153
buffer_index = 128 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 196
buffer_index = 171 , len(sampled_ep) = 135, trace = from 127 to 135
ep_idx[i] = 297
buffer_index = 272 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 241
buffer_index = 216 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 210
buffer_index = 185 , len(samp

buffer_index = 175 , len(sampled_ep) = 82, trace = from 44 to 52
ep_idx[i] = 92
buffer_index = 67 , len(sampled_ep) = 281, trace = from 184 to 192
ep_idx[i] = 54
buffer_index = 29 , len(sampled_ep) = 47, trace = from 5 to 13
ep_idx[i] = 119
buffer_index = 94 , len(sampled_ep) = 183, trace = from 160 to 168
ep_idx[i] = 136
buffer_index = 111 , len(sampled_ep) = 108, trace = from 89 to 97
ep_idx[i] = 199
buffer_index = 174 , len(sampled_ep) = 116, trace = from 97 to 105
ep_idx[i] = 317
buffer_index = 292 , len(sampled_ep) = 84, trace = from 16 to 24
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9994210000000048
epsilon is = 0.9994195000000048
epsilon is = 0.9994180000000048
epsilon is = 0.9994165000000048
epsilon is = 0.9994150000000048
Target Set Success
ep_idx = [184, 287, 145, 220, 208, 40, 86, 95, 125, 135, 191, 267, 134, 116, 316, 45, 182, 297, 206, 290, 122, 291, 181, 35, 58, 142, 204, 226, 154, 174, 291, 248]
exp_idx = [41, 146, 71, 107, 57, 34, 54, 50, 72, 151, 17, 79, 83, 19, 

buffer_index = 145 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 115
buffer_index = 90 , len(sampled_ep) = 299, trace = from 125 to 133
ep_idx[i] = 290
buffer_index = 265 , len(sampled_ep) = 152, trace = from 139 to 147
ep_idx[i] = 72
buffer_index = 47 , len(sampled_ep) = 82, trace = from 10 to 18
ep_idx[i] = 267
buffer_index = 242 , len(sampled_ep) = 83, trace = from 23 to 31
ep_idx[i] = 260
buffer_index = 235 , len(sampled_ep) = 108, trace = from 16 to 24
ep_idx[i] = 82
buffer_index = 57 , len(sampled_ep) = 73, trace = from 5 to 13
ep_idx[i] = 79
buffer_index = 54 , len(sampled_ep) = 75, trace = from 10 to 18
ep_idx[i] = 309
buffer_index = 284 , len(sampled_ep) = 115, trace = from 100 to 108
ep_idx[i] = 191
buffer_index = 166 , len(sampled_ep) = 131, trace = from 42 to 50
ep_idx[i] = 226
buffer_index = 201 , len(sampled_ep) = 120, trace = from 88 to 96
ep_idx[i] = 296
buffer_index = 271 , len(sampled_ep) = 108, trace = from 93 to 101
ep_idx[i] = 170
buffer_index = 145 , le

buffer_index = 273 , len(sampled_ep) = 115, trace = from 11 to 19
ep_idx[i] = 133
buffer_index = 108 , len(sampled_ep) = 81, trace = from 7 to 15
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9993835000000051
epsilon is = 0.9993820000000051
epsilon is = 0.9993805000000051
epsilon is = 0.9993790000000051
epsilon is = 0.9993775000000051
Target Set Success
ep_idx = [140, 251, 152, 57, 202, 117, 234, 135, 65, 100, 146, 76, 274, 209, 115, 231, 149, 213, 256, 220, 183, 139, 39, 313, 229, 191, 220, 304, 66, 211, 157, 252]
exp_idx = [16, 132, 65, 12, 129, 46, 13, 80, 13, 76, 30, 10, 31, 12, 253, 45, 43, 7, 45, 23, 20, 81, 132, 43, 20, 69, 67, 64, 79, 75, 26, 53]
idx_offset = 25, self.episode_index = 324, len(self.buffer) = 300
ep_idx[i] = 140
buffer_index = 115 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 251
buffer_index = 226 , len(sampled_ep) = 133, trace = from 125 to 133
ep_idx[i] = 152
buffer_index = 127 , len(sampled_ep) = 228, trace = from 58 to 66
ep_idx[i] = 57
buffer_i

buffer_index = 110 , len(sampled_ep) = 211, trace = from 34 to 42
ep_idx[i] = 70
buffer_index = 45 , len(sampled_ep) = 198, trace = from 6 to 14
ep_idx[i] = 286
buffer_index = 261 , len(sampled_ep) = 147, trace = from 40 to 48
ep_idx[i] = 119
buffer_index = 94 , len(sampled_ep) = 183, trace = from 139 to 147
ep_idx[i] = 206
buffer_index = 181 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 268
buffer_index = 243 , len(sampled_ep) = 120, trace = from 20 to 28
ep_idx[i] = 162
buffer_index = 137 , len(sampled_ep) = 112, trace = from 66 to 74
ep_idx[i] = 137
buffer_index = 112 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 309
buffer_index = 284 , len(sampled_ep) = 115, trace = from 69 to 77
ep_idx[i] = 165
buffer_index = 140 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 89
buffer_index = 64 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 190
buffer_index = 165 , len(sampled_ep) = 42, trace = from 26 to 34
ep_idx[i] = 163
buffer_index = 138 , len(s

ep_idx = [139, 169, 277, 71, 302, 36, 230, 257, 247, 51, 169, 188, 47, 285, 145, 70, 56, 56, 252, 142, 231, 135, 236, 119, 135, 201, 316, 65, 92, 253, 131, 205]
exp_idx = [18, 76, 24, 23, 34, 24, 70, 49, 12, 85, 79, 16, 16, 23, 116, 57, 185, 140, 54, 76, 35, 63, 19, 52, 182, 16, 16, 13, 235, 12, 57, 47]
idx_offset = 26, self.episode_index = 325, len(self.buffer) = 300
ep_idx[i] = 139
buffer_index = 113 , len(sampled_ep) = 133, trace = from 11 to 19
ep_idx[i] = 169
buffer_index = 143 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 277
buffer_index = 251 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 71
buffer_index = 45 , len(sampled_ep) = 299, trace = from 16 to 24
ep_idx[i] = 302
buffer_index = 276 , len(sampled_ep) = 79, trace = from 27 to 35
ep_idx[i] = 36
buffer_index = 10 , len(sampled_ep) = 134, trace = from 17 to 25
ep_idx[i] = 230
buffer_index = 204 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 257
buffer_index = 231 , len(sampled_ep) = 82, trac

buffer_index = 294 , len(sampled_ep) = 71, trace = from 39 to 47
ep_idx[i] = 117
buffer_index = 91 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 300
buffer_index = 274 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 54
buffer_index = 28 , len(sampled_ep) = 47, trace = from 9 to 17
ep_idx[i] = 192
buffer_index = 166 , len(sampled_ep) = 78, trace = from 30 to 38
ep_idx[i] = 92
buffer_index = 66 , len(sampled_ep) = 281, trace = from 14 to 22
ep_idx[i] = 160
buffer_index = 134 , len(sampled_ep) = 117, trace = from 49 to 57
ep_idx[i] = 281
buffer_index = 255 , len(sampled_ep) = 178, trace = from 14 to 22
ep_idx[i] = 215
buffer_index = 189 , len(sampled_ep) = 117, trace = from 67 to 75
ep_idx[i] = 88
buffer_index = 62 , len(sampled_ep) = 114, trace = from 46 to 54
ep_idx[i] = 284
buffer_index = 258 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 159
buffer_index = 133 , len(sampled_ep) = 79, trace = from 11 to 19
ep_idx[i] = 294
buffer_index = 268 , len(sampled_

ep_idx[i] = 64
buffer_index = 38 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 88
buffer_index = 62 , len(sampled_ep) = 114, trace = from 27 to 35
ep_idx[i] = 209
buffer_index = 183 , len(sampled_ep) = 71, trace = from 24 to 32
ep_idx[i] = 43
buffer_index = 17 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 306
buffer_index = 280 , len(sampled_ep) = 117, trace = from 26 to 34
ep_idx[i] = 179
buffer_index = 153 , len(sampled_ep) = 77, trace = from 22 to 30
ep_idx[i] = 222
buffer_index = 196 , len(sampled_ep) = 224, trace = from 128 to 136
ep_idx[i] = 108
buffer_index = 82 , len(sampled_ep) = 45, trace = from 6 to 14
ep_idx[i] = 72
buffer_index = 46 , len(sampled_ep) = 82, trace = from 10 to 18
ep_idx[i] = 113
buffer_index = 87 , len(sampled_ep) = 105, trace = from 32 to 40
ep_idx[i] = 188
buffer_index = 162 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 305
buffer_index = 279 , len(sampled_ep) = 71, trace = from 60 to 68
ep_idx[i] = 49
buffer_index = 23 

ep_idx[i] = 286
buffer_index = 260 , len(sampled_ep) = 147, trace = from 74 to 82
ep_idx[i] = 218
buffer_index = 192 , len(sampled_ep) = 80, trace = from 59 to 67
ep_idx[i] = 203
buffer_index = 177 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 192
buffer_index = 166 , len(sampled_ep) = 78, trace = from 64 to 72
ep_idx[i] = 175
buffer_index = 149 , len(sampled_ep) = 102, trace = from 41 to 49
ep_idx[i] = 324
buffer_index = 298 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 216
buffer_index = 190 , len(sampled_ep) = 81, trace = from 40 to 48
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9992860000000059
epsilon is = 0.9992845000000059
epsilon is = 0.9992830000000059
epsilon is = 0.9992815000000059
epsilon is = 0.9992800000000059
Target Set Success
ep_idx = [260, 241, 267, 108, 109, 208, 26, 290, 188, 315, 76, 162, 56, 301, 267, 285, 203, 170, 29, 97, 147, 144, 300, 287, 284, 123, 145, 175, 116, 175, 31, 156]
exp_idx = [13, 25, 34, 15, 46, 79, 52, 14, 16, 77, 52, 

buffer_index = 87 , len(sampled_ep) = 105, trace = from 32 to 40
ep_idx[i] = 44
buffer_index = 18 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 208
buffer_index = 182 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 222
buffer_index = 196 , len(sampled_ep) = 224, trace = from 169 to 177
ep_idx[i] = 138
buffer_index = 112 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 321
buffer_index = 295 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 172
buffer_index = 146 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 82
buffer_index = 56 , len(sampled_ep) = 73, trace = from 60 to 68
ep_idx[i] = 284
buffer_index = 258 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 243
buffer_index = 217 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 193
buffer_index = 167 , len(sampled_ep) = 108, trace = from 1 to 9
ep_idx[i] = 215
buffer_index = 189 , len(sampled_ep) = 117, trace = from 49 to 57
ep_idx[i] = 26
buffer_index = 0 , len(sampled_

buffer_index = 222 , len(sampled_ep) = 77, trace = from 4 to 12
ep_idx[i] = 245
buffer_index = 219 , len(sampled_ep) = 75, trace = from 39 to 47
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9992485000000062
epsilon is = 0.9992470000000062
epsilon is = 0.9992455000000062
epsilon is = 0.9992440000000062
epsilon is = 0.9992425000000063
Target Set Success
ep_idx = [84, 39, 70, 38, 121, 60, 179, 242, 40, 168, 71, 166, 225, 93, 267, 269, 116, 314, 109, 252, 142, 186, 187, 46, 51, 90, 50, 124, 135, 45, 257, 70]
exp_idx = [54, 66, 135, 10, 16, 63, 59, 79, 63, 78, 10, 32, 15, 56, 59, 11, 123, 20, 13, 7, 30, 16, 15, 79, 92, 62, 14, 9, 49, 43, 51, 87]
idx_offset = 26, self.episode_index = 325, len(self.buffer) = 300
ep_idx[i] = 84
buffer_index = 58 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 39
buffer_index = 13 , len(sampled_ep) = 133, trace = from 59 to 67
ep_idx[i] = 70
buffer_index = 44 , len(sampled_ep) = 198, trace = from 128 to 136
ep_idx[i] = 38
buffer_index = 12 , len(sa

buffer_index = 194 , len(sampled_ep) = 118, trace = from 3 to 11
ep_idx[i] = 107
buffer_index = 81 , len(sampled_ep) = 109, trace = from 18 to 26
ep_idx[i] = 142
buffer_index = 116 , len(sampled_ep) = 108, trace = from 63 to 71
ep_idx[i] = 87
buffer_index = 61 , len(sampled_ep) = 76, trace = from 42 to 50
ep_idx[i] = 72
buffer_index = 46 , len(sampled_ep) = 82, trace = from 24 to 32
ep_idx[i] = 282
buffer_index = 256 , len(sampled_ep) = 43, trace = from 4 to 12
ep_idx[i] = 289
buffer_index = 263 , len(sampled_ep) = 83, trace = from 32 to 40
ep_idx[i] = 191
buffer_index = 165 , len(sampled_ep) = 131, trace = from 67 to 75
ep_idx[i] = 199
buffer_index = 173 , len(sampled_ep) = 116, trace = from 90 to 98
ep_idx[i] = 168
buffer_index = 142 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 202
buffer_index = 176 , len(sampled_ep) = 143, trace = from 40 to 48
ep_idx[i] = 237
buffer_index = 211 , len(sampled_ep) = 116, trace = from 67 to 75
ep_idx[i] = 90
buffer_index = 64 , len(sampl

ep_idx = [194, 304, 312, 325, 52, 231, 134, 257, 118, 251, 110, 119, 119, 113, 37, 316, 116, 109, 297, 251, 284, 260, 95, 71, 214, 163, 82, 197, 281, 204, 319, 193]
exp_idx = [33, 45, 90, 79, 50, 74, 128, 50, 20, 106, 125, 162, 21, 52, 26, 33, 68, 12, 17, 62, 17, 43, 22, 254, 56, 93, 11, 77, 47, 21, 16, 64]
idx_offset = 27, self.episode_index = 326, len(self.buffer) = 300
ep_idx[i] = 194
buffer_index = 167 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 304
buffer_index = 277 , len(sampled_ep) = 72, trace = from 38 to 46
ep_idx[i] = 312
buffer_index = 285 , len(sampled_ep) = 108, trace = from 83 to 91
ep_idx[i] = 325
buffer_index = 298 , len(sampled_ep) = 82, trace = from 72 to 80
ep_idx[i] = 52
buffer_index = 25 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 231
buffer_index = 204 , len(sampled_ep) = 77, trace = from 67 to 75
ep_idx[i] = 134
buffer_index = 107 , len(sampled_ep) = 208, trace = from 121 to 129
ep_idx[i] = 257
buffer_index = 230 , len(sampled_ep) = 8

buffer_index = 184 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 52
buffer_index = 25 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 80
buffer_index = 53 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 114
buffer_index = 87 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 54
buffer_index = 27 , len(sampled_ep) = 47, trace = from 20 to 28
ep_idx[i] = 134
buffer_index = 107 , len(sampled_ep) = 208, trace = from 175 to 183
ep_idx[i] = 112
buffer_index = 85 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 118
buffer_index = 91 , len(sampled_ep) = 111, trace = from 42 to 50
ep_idx[i] = 259
buffer_index = 232 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 253
buffer_index = 226 , len(sampled_ep) = 76, trace = from 48 to 56
ep_idx[i] = 156
buffer_index = 129 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 176
buffer_index = 149 , len(sampled_ep) = 80, trace = from 55 to 63
ep_idx[i] = 281
buffer_index = 254 , len(sampled_ep)

buffer_index = 164 , len(sampled_ep) = 131, trace = from 5 to 13
ep_idx[i] = 258
buffer_index = 231 , len(sampled_ep) = 76, trace = from 38 to 46
ep_idx[i] = 272
buffer_index = 245 , len(sampled_ep) = 70, trace = from 26 to 34
ep_idx[i] = 229
buffer_index = 202 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 281
buffer_index = 254 , len(sampled_ep) = 178, trace = from 27 to 35
ep_idx[i] = 152
buffer_index = 125 , len(sampled_ep) = 228, trace = from 129 to 137
ep_idx[i] = 92
buffer_index = 65 , len(sampled_ep) = 281, trace = from 221 to 229
ep_idx[i] = 71
buffer_index = 44 , len(sampled_ep) = 299, trace = from 165 to 173
ep_idx[i] = 141
buffer_index = 114 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 206
buffer_index = 179 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 192
buffer_index = 165 , len(sampled_ep) = 78, trace = from 15 to 23
ep_idx[i] = 298
buffer_index = 271 , len(sampled_ep) = 115, trace = from 12 to 20
ep_idx[i] = 170
buffer_index = 143 , 

buffer_index = 244 , len(sampled_ep) = 46, trace = from 16 to 24
ep_idx[i] = 208
buffer_index = 181 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 107
buffer_index = 80 , len(sampled_ep) = 109, trace = from 16 to 24
ep_idx[i] = 56
buffer_index = 29 , len(sampled_ep) = 198, trace = from 100 to 108
ep_idx[i] = 186
buffer_index = 159 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 160
buffer_index = 133 , len(sampled_ep) = 117, trace = from 72 to 80
ep_idx[i] = 191
buffer_index = 164 , len(sampled_ep) = 131, trace = from 82 to 90
ep_idx[i] = 145
buffer_index = 118 , len(sampled_ep) = 117, trace = from 100 to 108
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.999151000000007
epsilon is = 0.999149500000007
epsilon is = 0.999148000000007
epsilon is = 0.999146500000007
epsilon is = 0.999145000000007
Target Set Success
ep_idx = [38, 298, 38, 280, 33, 171, 227, 207, 299, 202, 76, 115, 275, 101, 71, 223, 252, 206, 295, 134, 109, 163, 62, 214, 191, 254, 296, 212, 28, 230, 68, 

buffer_index = 148 , len(sampled_ep) = 102, trace = from 30 to 38
ep_idx[i] = 288
buffer_index = 261 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 196
buffer_index = 169 , len(sampled_ep) = 135, trace = from 94 to 102
ep_idx[i] = 150
buffer_index = 123 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 293
buffer_index = 266 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 290
buffer_index = 263 , len(sampled_ep) = 152, trace = from 94 to 102
ep_idx[i] = 184
buffer_index = 157 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 150
buffer_index = 123 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 141
buffer_index = 114 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 199
buffer_index = 172 , len(sampled_ep) = 116, trace = from 88 to 96
ep_idx[i] = 266
buffer_index = 239 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 199
buffer_index = 172 , len(sampled_ep) = 116, trace = from 45 to 53
ep_idx[i] = 325
buffer_index = 298 , len

ep_idx[i] = 301
buffer_index = 274 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 156
buffer_index = 129 , len(sampled_ep) = 81, trace = from 26 to 34
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9991135000000073
epsilon is = 0.9991120000000073
epsilon is = 0.9991105000000073
epsilon is = 0.9991090000000074
epsilon is = 0.9991075000000074
Target Set Success
ep_idx = [84, 151, 247, 123, 129, 196, 194, 242, 161, 286, 45, 324, 138, 34, 215, 141, 254, 313, 71, 56, 132, 257, 281, 76, 104, 99, 147, 325, 257, 199, 40, 160]
exp_idx = [76, 39, 38, 11, 22, 109, 16, 51, 102, 135, 13, 48, 49, 27, 49, 7, 48, 43, 98, 36, 79, 78, 99, 174, 24, 62, 75, 22, 52, 17, 21, 20]
idx_offset = 27, self.episode_index = 326, len(self.buffer) = 300
ep_idx[i] = 84
buffer_index = 57 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 151
buffer_index = 124 , len(sampled_ep) = 82, trace = from 32 to 40
ep_idx[i] = 247
buffer_index = 220 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 123

ep_idx[i] = 107
buffer_index = 79 , len(sampled_ep) = 109, trace = from 4 to 12
ep_idx[i] = 256
buffer_index = 228 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 133
buffer_index = 105 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 119
buffer_index = 91 , len(sampled_ep) = 183, trace = from 81 to 89
ep_idx[i] = 241
buffer_index = 213 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 152
buffer_index = 124 , len(sampled_ep) = 228, trace = from 88 to 96
ep_idx[i] = 44
buffer_index = 16 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 52
buffer_index = 24 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 294
buffer_index = 266 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 109
buffer_index = 81 , len(sampled_ep) = 48, trace = from 15 to 23
ep_idx[i] = 85
buffer_index = 57 , len(sampled_ep) = 108, trace = from 43 to 51
ep_idx[i] = 170
buffer_index = 142 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 56
buffer_index = 28 , l

ep_idx = [204, 293, 48, 265, 299, 100, 179, 39, 162, 202, 223, 135, 80, 57, 172, 300, 42, 147, 179, 50, 213, 211, 268, 158, 304, 147, 115, 286, 301, 137, 75, 167]
exp_idx = [57, 41, 73, 59, 19, 40, 36, 12, 45, 67, 105, 67, 35, 88, 19, 7, 12, 16, 8, 31, 41, 15, 49, 66, 12, 20, 173, 82, 21, 16, 23, 18]
idx_offset = 28, self.episode_index = 327, len(self.buffer) = 300
ep_idx[i] = 204
buffer_index = 176 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 293
buffer_index = 265 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 48
buffer_index = 20 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 265
buffer_index = 237 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 299
buffer_index = 271 , len(sampled_ep) = 100, trace = from 12 to 20
ep_idx[i] = 100
buffer_index = 72 , len(sampled_ep) = 102, trace = from 33 to 41
ep_idx[i] = 179
buffer_index = 151 , len(sampled_ep) = 77, trace = from 29 to 37
ep_idx[i] = 39
buffer_index = 11 , len(sampled_ep) = 133, trace = 

buffer_index = 295 , len(sampled_ep) = 43, trace = from 33 to 41
ep_idx[i] = 293
buffer_index = 265 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 74
buffer_index = 46 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 163
buffer_index = 135 , len(sampled_ep) = 134, trace = from 79 to 87
ep_idx[i] = 323
buffer_index = 295 , len(sampled_ep) = 43, trace = from 13 to 21
ep_idx[i] = 159
buffer_index = 131 , len(sampled_ep) = 79, trace = from 12 to 20
ep_idx[i] = 116
buffer_index = 88 , len(sampled_ep) = 162, trace = from 35 to 43
ep_idx[i] = 217
buffer_index = 189 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 255
buffer_index = 227 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 213
buffer_index = 185 , len(sampled_ep) = 148, trace = from 10 to 18
ep_idx[i] = 248
buffer_index = 220 , len(sampled_ep) = 77, trace = from 0 to 8
ep_idx[i] = 253
buffer_index = 225 , len(sampled_ep) = 76, trace = from 29 to 37
ep_idx[i] = 120
buffer_index = 92 , len(sampled_

buffer_index = 232 , len(sampled_ep) = 108, trace = from 63 to 71
ep_idx[i] = 292
buffer_index = 264 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 127
buffer_index = 99 , len(sampled_ep) = 50, trace = from 2 to 10
ep_idx[i] = 251
buffer_index = 223 , len(sampled_ep) = 133, trace = from 48 to 56
ep_idx[i] = 180
buffer_index = 152 , len(sampled_ep) = 141, trace = from 29 to 37
ep_idx[i] = 259
buffer_index = 231 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 71
buffer_index = 43 , len(sampled_ep) = 299, trace = from 202 to 210
ep_idx[i] = 186
buffer_index = 158 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 317
buffer_index = 289 , len(sampled_ep) = 84, trace = from 71 to 79
ep_idx[i] = 212
buffer_index = 184 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 211
buffer_index = 183 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 47
buffer_index = 19 , len(sampled_ep) = 83, trace = from 2 to 10
ep_idx[i] = 173
buffer_index = 145 , len(sampl

ep_idx[i] = 157
buffer_index = 129 , len(sampled_ep) = 103, trace = from 7 to 15
ep_idx[i] = 154
buffer_index = 126 , len(sampled_ep) = 153, trace = from 41 to 49
ep_idx[i] = 215
buffer_index = 187 , len(sampled_ep) = 117, trace = from 73 to 81
ep_idx[i] = 88
buffer_index = 60 , len(sampled_ep) = 114, trace = from 105 to 113
ep_idx[i] = 295
buffer_index = 267 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 76
buffer_index = 48 , len(sampled_ep) = 198, trace = from 130 to 138
ep_idx[i] = 35
buffer_index = 7 , len(sampled_ep) = 82, trace = from 59 to 67
ep_idx[i] = 316
buffer_index = 288 , len(sampled_ep) = 81, trace = from 32 to 40
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9990160000000081
epsilon is = 0.9990145000000081
epsilon is = 0.9990130000000081
epsilon is = 0.9990115000000082
epsilon is = 0.9990100000000082
Target Set Success
ep_idx = [215, 208, 147, 201, 76, 52, 143, 289, 31, 119, 198, 216, 92, 138, 172, 138, 114, 115, 215, 75, 123, 230, 310, 303, 123, 77, 197, 

buffer_index = 24 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 169
buffer_index = 141 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 29
buffer_index = 1 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 222
buffer_index = 194 , len(sampled_ep) = 224, trace = from 172 to 180
ep_idx[i] = 195
buffer_index = 167 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 281
buffer_index = 253 , len(sampled_ep) = 178, trace = from 38 to 46
ep_idx[i] = 110
buffer_index = 82 , len(sampled_ep) = 137, trace = from 17 to 25
ep_idx[i] = 191
buffer_index = 163 , len(sampled_ep) = 131, trace = from 76 to 84
ep_idx[i] = 322
buffer_index = 294 , len(sampled_ep) = 111, trace = from 20 to 28
ep_idx[i] = 32
buffer_index = 4 , len(sampled_ep) = 84, trace = from 28 to 36
ep_idx[i] = 65
buffer_index = 37 , len(sampled_ep) = 45, trace = from 4 to 12
ep_idx[i] = 102
buffer_index = 74 , len(sampled_ep) = 103, trace = from 40 to 48
ep_idx[i] = 131
buffer_index = 103 , len(sampled_e

buffer_index = 20 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 56
buffer_index = 27 , len(sampled_ep) = 198, trace = from 49 to 57
ep_idx[i] = 149
buffer_index = 120 , len(sampled_ep) = 82, trace = from 6 to 14
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9989785000000084
epsilon is = 0.9989770000000084
epsilon is = 0.9989755000000085
epsilon is = 0.9989740000000085
epsilon is = 0.9989725000000085
Target Set Success
ep_idx = [141, 98, 189, 306, 128, 279, 104, 82, 262, 134, 226, 215, 199, 156, 257, 101, 232, 260, 192, 203, 71, 313, 118, 114, 71, 134, 175, 229, 161, 100, 316, 271]
exp_idx = [8, 39, 45, 15, 59, 16, 48, 58, 16, 182, 19, 55, 78, 16, 73, 32, 18, 98, 73, 77, 228, 59, 16, 42, 47, 137, 51, 54, 15, 91, 16, 15]
idx_offset = 29, self.episode_index = 328, len(self.buffer) = 300
ep_idx[i] = 141
buffer_index = 112 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 98
buffer_index = 69 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 189
buffer_index = 160

buffer_index = 246 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 184
buffer_index = 155 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 260
buffer_index = 231 , len(sampled_ep) = 108, trace = from 2 to 10
ep_idx[i] = 223
buffer_index = 194 , len(sampled_ep) = 112, trace = from 39 to 47
ep_idx[i] = 304
buffer_index = 275 , len(sampled_ep) = 72, trace = from 42 to 50
ep_idx[i] = 319
buffer_index = 290 , len(sampled_ep) = 70, trace = from 49 to 57
ep_idx[i] = 45
buffer_index = 16 , len(sampled_ep) = 100, trace = from 72 to 80
ep_idx[i] = 158
buffer_index = 129 , len(sampled_ep) = 83, trace = from 9 to 17
ep_idx[i] = 115
buffer_index = 86 , len(sampled_ep) = 299, trace = from 200 to 208
ep_idx[i] = 193
buffer_index = 164 , len(sampled_ep) = 108, trace = from 36 to 44
ep_idx[i] = 305
buffer_index = 276 , len(sampled_ep) = 71, trace = from 55 to 63
ep_idx[i] = 227
buffer_index = 198 , len(sampled_ep) = 112, trace = from 91 to 99
ep_idx[i] = 213
buffer_index = 184 , len(

ep_idx = [164, 177, 260, 120, 35, 227, 110, 258, 257, 72, 281, 240, 95, 152, 246, 134, 191, 72, 84, 165, 263, 287, 177, 53, 71, 70, 265, 251, 107, 95, 292, 242]
exp_idx = [10, 13, 53, 43, 12, 60, 111, 67, 27, 35, 114, 39, 18, 92, 44, 137, 12, 19, 16, 22, 21, 80, 15, 17, 261, 66, 65, 123, 27, 44, 19, 8]
idx_offset = 29, self.episode_index = 328, len(self.buffer) = 300
ep_idx[i] = 164
buffer_index = 135 , len(sampled_ep) = 118, trace = from 3 to 11
ep_idx[i] = 177
buffer_index = 148 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 260
buffer_index = 231 , len(sampled_ep) = 108, trace = from 46 to 54
ep_idx[i] = 120
buffer_index = 91 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 35
buffer_index = 6 , len(sampled_ep) = 82, trace = from 5 to 13
ep_idx[i] = 227
buffer_index = 198 , len(sampled_ep) = 112, trace = from 53 to 61
ep_idx[i] = 110
buffer_index = 81 , len(sampled_ep) = 137, trace = from 104 to 112
ep_idx[i] = 258
buffer_index = 229 , len(sampled_ep) = 76, trace 

ep_idx[i] = 180
buffer_index = 151 , len(sampled_ep) = 141, trace = from 32 to 40
ep_idx[i] = 142
buffer_index = 113 , len(sampled_ep) = 108, trace = from 48 to 56
ep_idx[i] = 164
buffer_index = 135 , len(sampled_ep) = 118, trace = from 10 to 18
ep_idx[i] = 234
buffer_index = 205 , len(sampled_ep) = 167, trace = from 145 to 153
ep_idx[i] = 140
buffer_index = 111 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 206
buffer_index = 177 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 251
buffer_index = 222 , len(sampled_ep) = 133, trace = from 53 to 61
ep_idx[i] = 234
buffer_index = 205 , len(sampled_ep) = 167, trace = from 110 to 118
ep_idx[i] = 71
buffer_index = 42 , len(sampled_ep) = 299, trace = from 250 to 258
ep_idx[i] = 53
buffer_index = 24 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 221
buffer_index = 192 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 286
buffer_index = 257 , len(sampled_ep) = 147, trace = from 19 to 27
ep_idx[i] = 290
buff

buffer_index = 41 , len(sampled_ep) = 198, trace = from 166 to 174
ep_idx[i] = 281
buffer_index = 252 , len(sampled_ep) = 178, trace = from 64 to 72
ep_idx[i] = 137
buffer_index = 108 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 70
buffer_index = 41 , len(sampled_ep) = 198, trace = from 66 to 74
ep_idx[i] = 70
buffer_index = 41 , len(sampled_ep) = 198, trace = from 169 to 177
ep_idx[i] = 156
buffer_index = 127 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 173
buffer_index = 144 , len(sampled_ep) = 71, trace = from 46 to 54
ep_idx[i] = 56
buffer_index = 27 , len(sampled_ep) = 198, trace = from 137 to 145
ep_idx[i] = 135
buffer_index = 106 , len(sampled_ep) = 211, trace = from 94 to 102
ep_idx[i] = 152
buffer_index = 123 , len(sampled_ep) = 228, trace = from 49 to 57
ep_idx[i] = 39
buffer_index = 10 , len(sampled_ep) = 133, trace = from 42 to 50
ep_idx[i] = 50
buffer_index = 21 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 249
buffer_index = 220 , len

buffer_index = 248 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 90
buffer_index = 61 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 278
buffer_index = 249 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 142
buffer_index = 113 , len(sampled_ep) = 108, trace = from 45 to 53
ep_idx[i] = 150
buffer_index = 121 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 188
buffer_index = 159 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 202
buffer_index = 173 , len(sampled_ep) = 143, trace = from 24 to 32
ep_idx[i] = 264
buffer_index = 235 , len(sampled_ep) = 109, trace = from 16 to 24
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9988810000000092
epsilon is = 0.9988795000000092
epsilon is = 0.9988780000000093
epsilon is = 0.9988765000000093
epsilon is = 0.9988750000000093
Target Set Success
ep_idx = [204, 167, 100, 51, 204, 46, 258, 247, 167, 183, 152, 180, 201, 122, 71, 250, 52, 157, 274, 49, 259, 254, 264, 304, 265, 327, 47, 36, 150, 76, 114,

buffer_index = 283 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 90
buffer_index = 60 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 188
buffer_index = 158 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 285
buffer_index = 255 , len(sampled_ep) = 166, trace = from 7 to 15
ep_idx[i] = 300
buffer_index = 270 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 213
buffer_index = 183 , len(sampled_ep) = 148, trace = from 2 to 10
ep_idx[i] = 81
buffer_index = 51 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 307
buffer_index = 277 , len(sampled_ep) = 48, trace = from 4 to 12
ep_idx[i] = 63
buffer_index = 33 , len(sampled_ep) = 47, trace = from 27 to 35
ep_idx[i] = 69
buffer_index = 39 , len(sampled_ep) = 44, trace = from 11 to 19
ep_idx[i] = 52
buffer_index = 22 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 160
buffer_index = 130 , len(sampled_ep) = 117, trace = from 15 to 23
ep_idx[i] = 249
buffer_index = 219 , len(sampled_ep) = 75

buffer_index = 297 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 71
buffer_index = 41 , len(sampled_ep) = 299, trace = from 12 to 20
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9988435000000095
epsilon is = 0.9988420000000096
epsilon is = 0.9988405000000096
epsilon is = 0.9988390000000096
epsilon is = 0.9988375000000096
Target Set Success
ep_idx = [213, 223, 184, 184, 57, 68, 255, 228, 172, 131, 284, 125, 274, 200, 157, 226, 324, 57, 102, 211, 45, 83, 281, 248, 179, 209, 71, 193, 169, 135, 118, 133]
exp_idx = [108, 108, 68, 10, 11, 76, 58, 74, 21, 19, 18, 32, 84, 48, 82, 83, 8, 109, 78, 62, 86, 67, 169, 13, 12, 55, 295, 84, 60, 151, 21, 12]
idx_offset = 30, self.episode_index = 329, len(self.buffer) = 300
ep_idx[i] = 213
buffer_index = 183 , len(sampled_ep) = 148, trace = from 101 to 109
ep_idx[i] = 223
buffer_index = 193 , len(sampled_ep) = 112, trace = from 101 to 109
ep_idx[i] = 184
buffer_index = 154 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 184
buff

buffer_index = 93 , len(sampled_ep) = 198, trace = from 16 to 24
ep_idx[i] = 188
buffer_index = 158 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 130
buffer_index = 100 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 165
buffer_index = 135 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 222
buffer_index = 192 , len(sampled_ep) = 224, trace = from 100 to 108
ep_idx[i] = 137
buffer_index = 107 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 60
buffer_index = 30 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 302
buffer_index = 272 , len(sampled_ep) = 79, trace = from 13 to 21
ep_idx[i] = 49
buffer_index = 19 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 278
buffer_index = 248 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 44
buffer_index = 14 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 39
buffer_index = 9 , len(sampled_ep) = 133, trace = from 81 to 89
ep_idx[i] = 123
buffer_index = 93 , len(sampled_ep) 

ep_idx = [51, 225, 88, 92, 294, 144, 133, 229, 126, 302, 111, 36, 100, 204, 212, 286, 57, 125, 289, 215, 298, 321, 64, 71, 81, 220, 135, 214, 193, 199, 74, 179]
exp_idx = [132, 19, 36, 20, 18, 11, 76, 41, 14, 47, 48, 29, 89, 55, 19, 13, 88, 17, 56, 49, 50, 17, 65, 14, 16, 22, 181, 58, 53, 89, 23, 14]
idx_offset = 30, self.episode_index = 329, len(self.buffer) = 300
ep_idx[i] = 51
buffer_index = 21 , len(sampled_ep) = 145, trace = from 125 to 133
ep_idx[i] = 225
buffer_index = 195 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 88
buffer_index = 58 , len(sampled_ep) = 114, trace = from 29 to 37
ep_idx[i] = 92
buffer_index = 62 , len(sampled_ep) = 281, trace = from 13 to 21
ep_idx[i] = 294
buffer_index = 264 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 144
buffer_index = 114 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 133
buffer_index = 103 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 229
buffer_index = 199 , len(sampled_ep) = 81, trace = 

buffer_index = 221 , len(sampled_ep) = 133, trace = from 122 to 130
ep_idx[i] = 230
buffer_index = 200 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 71
buffer_index = 41 , len(sampled_ep) = 299, trace = from 9 to 17
ep_idx[i] = 31
buffer_index = 1 , len(sampled_ep) = 51, trace = from 35 to 43
ep_idx[i] = 302
buffer_index = 272 , len(sampled_ep) = 79, trace = from 68 to 76
ep_idx[i] = 99
buffer_index = 69 , len(sampled_ep) = 83, trace = from 34 to 42
ep_idx[i] = 71
buffer_index = 41 , len(sampled_ep) = 299, trace = from 227 to 235
ep_idx[i] = 316
buffer_index = 286 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 134
buffer_index = 104 , len(sampled_ep) = 208, trace = from 69 to 77
ep_idx[i] = 267
buffer_index = 237 , len(sampled_ep) = 83, trace = from 44 to 52
ep_idx[i] = 141
buffer_index = 111 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 232
buffer_index = 202 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 234
buffer_index = 204 , len(sampl

buffer_index = 262 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 192
buffer_index = 162 , len(sampled_ep) = 78, trace = from 8 to 16
ep_idx[i] = 123
buffer_index = 93 , len(sampled_ep) = 198, trace = from 132 to 140
ep_idx[i] = 329
buffer_index = 299 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 50
buffer_index = 20 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 59
buffer_index = 29 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 189
buffer_index = 159 , len(sampled_ep) = 84, trace = from 76 to 84
ep_idx[i] = 221
buffer_index = 191 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 313
buffer_index = 283 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 97
buffer_index = 67 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 160
buffer_index = 130 , len(sampled_ep) = 117, trace = from 17 to 25
ep_idx[i] = 325
buffer_index = 295 , len(sampled_ep) = 82, trace = from 9 to 17
ep_idx[i] = 36
buffer_index = 6 , len(sampled_ep) =

buffer_index = 21 , len(sampled_ep) = 145, trace = from 9 to 17
ep_idx[i] = 306
buffer_index = 276 , len(sampled_ep) = 117, trace = from 94 to 102
ep_idx[i] = 306
buffer_index = 276 , len(sampled_ep) = 117, trace = from 101 to 109
ep_idx[i] = 49
buffer_index = 19 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 107
buffer_index = 77 , len(sampled_ep) = 109, trace = from 15 to 23
ep_idx[i] = 275
buffer_index = 245 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 144
buffer_index = 114 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 170
buffer_index = 140 , len(sampled_ep) = 81, trace = from 61 to 69
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9987460000000103
epsilon is = 0.9987445000000104
epsilon is = 0.9987430000000104
epsilon is = 0.9987415000000104
epsilon is = 0.9987400000000104
Target Set Success
ep_idx = [187, 76, 256, 110, 95, 259, 32, 122, 202, 252, 247, 39, 202, 102, 119, 245, 70, 205, 170, 107, 76, 322, 105, 88, 52, 253, 293, 86, 241, 320, 157

ep_idx[i] = 232
buffer_index = 202 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 320
buffer_index = 290 , len(sampled_ep) = 71, trace = from 7 to 15
ep_idx[i] = 207
buffer_index = 177 , len(sampled_ep) = 75, trace = from 2 to 10
ep_idx[i] = 277
buffer_index = 247 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 250
buffer_index = 220 , len(sampled_ep) = 49, trace = from 39 to 47
ep_idx[i] = 185
buffer_index = 155 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 234
buffer_index = 204 , len(sampled_ep) = 167, trace = from 136 to 144
ep_idx[i] = 122
buffer_index = 92 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 76
buffer_index = 46 , len(sampled_ep) = 198, trace = from 137 to 145
ep_idx[i] = 88
buffer_index = 58 , len(sampled_ep) = 114, trace = from 48 to 56
ep_idx[i] = 314
buffer_index = 284 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 115
buffer_index = 85 , len(sampled_ep) = 299, trace = from 59 to 67
ep_idx[i] = 88
buffer_index =

buffer_index = 117 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 62
buffer_index = 32 , len(sampled_ep) = 77, trace = from 45 to 53
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9987085000000107
epsilon is = 0.9987070000000107
epsilon is = 0.9987055000000107
epsilon is = 0.9987040000000107
epsilon is = 0.9987025000000107
Target Set Success
ep_idx = [254, 214, 239, 182, 296, 53, 115, 284, 86, 309, 48, 101, 200, 131, 179, 263, 196, 284, 237, 198, 254, 86, 291, 209, 281, 56, 290, 205, 167, 267, 289, 290]
exp_idx = [61, 35, 80, 72, 52, 79, 91, 17, 27, 75, 69, 72, 47, 38, 19, 14, 13, 71, 48, 12, 103, 61, 55, 64, 70, 107, 116, 46, 16, 35, 9, 73]
idx_offset = 30, self.episode_index = 329, len(self.buffer) = 300
ep_idx[i] = 254
buffer_index = 224 , len(sampled_ep) = 116, trace = from 54 to 62
ep_idx[i] = 214
buffer_index = 184 , len(sampled_ep) = 75, trace = from 28 to 36
ep_idx[i] = 239
buffer_index = 209 , len(sampled_ep) = 101, trace = from 73 to 81
ep_idx[i] = 182
buffer_inde

buffer_index = 261 , len(sampled_ep) = 103, trace = from 92 to 100
ep_idx[i] = 160
buffer_index = 130 , len(sampled_ep) = 117, trace = from 44 to 52
ep_idx[i] = 40
buffer_index = 10 , len(sampled_ep) = 116, trace = from 9 to 17
ep_idx[i] = 165
buffer_index = 135 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 270
buffer_index = 240 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 294
buffer_index = 264 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 149
buffer_index = 119 , len(sampled_ep) = 82, trace = from 12 to 20
ep_idx[i] = 32
buffer_index = 2 , len(sampled_ep) = 84, trace = from 9 to 17
ep_idx[i] = 253
buffer_index = 223 , len(sampled_ep) = 76, trace = from 32 to 40
ep_idx[i] = 90
buffer_index = 60 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 102
buffer_index = 72 , len(sampled_ep) = 103, trace = from 5 to 13
ep_idx[i] = 51
buffer_index = 21 , len(sampled_ep) = 145, trace = from 5 to 13
ep_idx[i] = 177
buffer_index = 147 , len(sampled_ep) 

ep_idx = [92, 90, 327, 200, 135, 64, 211, 208, 329, 92, 305, 152, 209, 111, 310, 304, 47, 228, 141, 36, 131, 152, 66, 87, 169, 180, 305, 56, 85, 135, 214, 327]
exp_idx = [17, 24, 18, 77, 152, 49, 40, 59, 64, 199, 27, 55, 26, 67, 138, 64, 17, 50, 17, 36, 49, 17, 10, 57, 31, 99, 14, 87, 75, 60, 23, 35]
idx_offset = 30, self.episode_index = 329, len(self.buffer) = 300
ep_idx[i] = 92
buffer_index = 62 , len(sampled_ep) = 281, trace = from 10 to 18
ep_idx[i] = 90
buffer_index = 60 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 327
buffer_index = 297 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 200
buffer_index = 170 , len(sampled_ep) = 82, trace = from 70 to 78
ep_idx[i] = 135
buffer_index = 105 , len(sampled_ep) = 211, trace = from 145 to 153
ep_idx[i] = 64
buffer_index = 34 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 211
buffer_index = 181 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 208
buffer_index = 178 , len(sampled_ep) = 81, trace = 

buffer_index = 89 , len(sampled_ep) = 183, trace = from 81 to 89
ep_idx[i] = 93
buffer_index = 63 , len(sampled_ep) = 77, trace = from 40 to 48
ep_idx[i] = 163
buffer_index = 133 , len(sampled_ep) = 134, trace = from 105 to 113
ep_idx[i] = 168
buffer_index = 138 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 274
buffer_index = 244 , len(sampled_ep) = 114, trace = from 19 to 27
ep_idx[i] = 219
buffer_index = 189 , len(sampled_ep) = 110, trace = from 71 to 79
ep_idx[i] = 71
buffer_index = 41 , len(sampled_ep) = 299, trace = from 105 to 113
ep_idx[i] = 62
buffer_index = 32 , len(sampled_ep) = 77, trace = from 25 to 33
ep_idx[i] = 218
buffer_index = 188 , len(sampled_ep) = 80, trace = from 5 to 13
ep_idx[i] = 106
buffer_index = 76 , len(sampled_ep) = 103, trace = from 19 to 27
ep_idx[i] = 160
buffer_index = 130 , len(sampled_ep) = 117, trace = from 78 to 86
ep_idx[i] = 252
buffer_index = 222 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 226
buffer_index = 196 , len(sa

buffer_index = 162 , len(sampled_ep) = 78, trace = from 24 to 32
ep_idx[i] = 193
buffer_index = 163 , len(sampled_ep) = 108, trace = from 57 to 65
ep_idx[i] = 197
buffer_index = 167 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 80
buffer_index = 50 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 134
buffer_index = 104 , len(sampled_ep) = 208, trace = from 108 to 116
ep_idx[i] = 230
buffer_index = 200 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 222
buffer_index = 192 , len(sampled_ep) = 224, trace = from 122 to 130
ep_idx[i] = 265
buffer_index = 235 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 247
buffer_index = 217 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 288
buffer_index = 258 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 273
buffer_index = 243 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 223
buffer_index = 193 , len(sampled_ep) = 112, trace = from 77 to 85
ep_idx[i] = 284
buffer_index = 254 , le

buffer_index = 153 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 247
buffer_index = 217 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 321
buffer_index = 291 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 318
buffer_index = 288 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 323
buffer_index = 293 , len(sampled_ep) = 43, trace = from 9 to 17
ep_idx[i] = 192
buffer_index = 162 , len(sampled_ep) = 78, trace = from 51 to 59
ep_idx[i] = 71
buffer_index = 41 , len(sampled_ep) = 299, trace = from 14 to 22
ep_idx[i] = 206
buffer_index = 176 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 222
buffer_index = 192 , len(sampled_ep) = 224, trace = from 12 to 20
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9986110000000115
epsilon is = 0.9986095000000115
epsilon is = 0.9986080000000115
epsilon is = 0.9986065000000115
epsilon is = 0.9986050000000115
Target Set Success
ep_idx = [90, 116, 280, 329, 325, 304, 133, 122, 230, 248, 81, 88, 121, 135,

buffer_index = 130 , len(sampled_ep) = 114, trace = from 1 to 9
ep_idx[i] = 191
buffer_index = 160 , len(sampled_ep) = 131, trace = from 39 to 47
ep_idx[i] = 161
buffer_index = 130 , len(sampled_ep) = 114, trace = from 23 to 31
ep_idx[i] = 108
buffer_index = 77 , len(sampled_ep) = 45, trace = from 23 to 31
ep_idx[i] = 43
buffer_index = 12 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 160
buffer_index = 129 , len(sampled_ep) = 117, trace = from 16 to 24
ep_idx[i] = 99
buffer_index = 68 , len(sampled_ep) = 83, trace = from 51 to 59
ep_idx[i] = 137
buffer_index = 106 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 325
buffer_index = 294 , len(sampled_ep) = 82, trace = from 12 to 20
ep_idx[i] = 118
buffer_index = 87 , len(sampled_ep) = 111, trace = from 9 to 17
ep_idx[i] = 306
buffer_index = 275 , len(sampled_ep) = 117, trace = from 13 to 21
ep_idx[i] = 328
buffer_index = 297 , len(sampled_ep) = 75, trace = from 23 to 31
ep_idx[i] = 310
buffer_index = 279 , len(sample

buffer_index = 198 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 110
buffer_index = 79 , len(sampled_ep) = 137, trace = from 61 to 69
ep_idx[i] = 53
buffer_index = 22 , len(sampled_ep) = 81, trace = from 68 to 76
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9985735000000118
epsilon is = 0.9985720000000118
epsilon is = 0.9985705000000118
epsilon is = 0.9985690000000118
epsilon is = 0.9985675000000118
Target Set Success
ep_idx = [257, 292, 134, 81, 184, 297, 228, 115, 285, 216, 71, 194, 281, 51, 150, 135, 51, 99, 120, 259, 123, 123, 72, 123, 275, 43, 152, 237, 170, 163, 76, 315]
exp_idx = [77, 67, 159, 74, 15, 71, 77, 16, 68, 31, 103, 10, 170, 15, 27, 153, 139, 47, 78, 71, 120, 26, 22, 101, 16, 56, 183, 114, 18, 15, 114, 59]
idx_offset = 31, self.episode_index = 330, len(self.buffer) = 300
ep_idx[i] = 257
buffer_index = 226 , len(sampled_ep) = 82, trace = from 70 to 78
ep_idx[i] = 292
buffer_index = 261 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 134
buffer_i

buffer_index = 181 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 286
buffer_index = 255 , len(sampled_ep) = 147, trace = from 118 to 126
ep_idx[i] = 301
buffer_index = 270 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 107
buffer_index = 76 , len(sampled_ep) = 109, trace = from 14 to 22
ep_idx[i] = 48
buffer_index = 17 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 52
buffer_index = 21 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 139
buffer_index = 108 , len(sampled_ep) = 133, trace = from 56 to 64
ep_idx[i] = 71
buffer_index = 40 , len(sampled_ep) = 299, trace = from 272 to 280
ep_idx[i] = 191
buffer_index = 160 , len(sampled_ep) = 131, trace = from 120 to 128
ep_idx[i] = 107
buffer_index = 76 , len(sampled_ep) = 109, trace = from 82 to 90
ep_idx[i] = 53
buffer_index = 22 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 262
buffer_index = 231 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 39
buffer_index = 8 , len(sampl

ep_idx = [206, 218, 189, 75, 123, 308, 328, 274, 136, 40, 31, 247, 283, 189, 74, 115, 179, 239, 285, 101, 272, 311, 310, 243, 292, 49, 317, 93, 240, 110, 136, 188]
exp_idx = [47, 19, 41, 61, 124, 17, 55, 18, 76, 63, 23, 18, 38, 68, 80, 261, 17, 38, 89, 16, 48, 76, 116, 30, 69, 43, 81, 58, 73, 124, 50, 61]
idx_offset = 31, self.episode_index = 330, len(self.buffer) = 300
ep_idx[i] = 206
buffer_index = 175 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 218
buffer_index = 187 , len(sampled_ep) = 80, trace = from 12 to 20
ep_idx[i] = 189
buffer_index = 158 , len(sampled_ep) = 84, trace = from 34 to 42
ep_idx[i] = 75
buffer_index = 44 , len(sampled_ep) = 75, trace = from 54 to 62
ep_idx[i] = 123
buffer_index = 92 , len(sampled_ep) = 198, trace = from 117 to 125
ep_idx[i] = 308
buffer_index = 277 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 328
buffer_index = 297 , len(sampled_ep) = 75, trace = from 48 to 56
ep_idx[i] = 274
buffer_index = 243 , len(sampled_ep) = 114, 

buffer_index = 203 , len(sampled_ep) = 167, trace = from 71 to 79
ep_idx[i] = 132
buffer_index = 101 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 172
buffer_index = 141 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 260
buffer_index = 229 , len(sampled_ep) = 108, trace = from 70 to 78
ep_idx[i] = 35
buffer_index = 4 , len(sampled_ep) = 82, trace = from 17 to 25
ep_idx[i] = 217
buffer_index = 186 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 195
buffer_index = 164 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 251
buffer_index = 220 , len(sampled_ep) = 133, trace = from 123 to 131
ep_idx[i] = 127
buffer_index = 96 , len(sampled_ep) = 50, trace = from 36 to 44
ep_idx[i] = 59
buffer_index = 28 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 148
buffer_index = 117 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 107
buffer_index = 76 , len(sampled_ep) = 109, trace = from 25 to 33
ep_idx[i] = 41
buffer_index = 10 , len(sample

buffer_index = 81 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 213
buffer_index = 182 , len(sampled_ep) = 148, trace = from 132 to 140
ep_idx[i] = 309
buffer_index = 278 , len(sampled_ep) = 115, trace = from 18 to 26
ep_idx[i] = 97
buffer_index = 66 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 201
buffer_index = 170 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 188
buffer_index = 157 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 294
buffer_index = 263 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 174
buffer_index = 143 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 290
buffer_index = 259 , len(sampled_ep) = 152, trace = from 120 to 128
ep_idx[i] = 193
buffer_index = 162 , len(sampled_ep) = 108, trace = from 8 to 16
ep_idx[i] = 276
buffer_index = 245 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 277
buffer_index = 246 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 213
buffer_index = 182 , len(

buffer_index = 293 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 291
buffer_index = 260 , len(sampled_ep) = 103, trace = from 12 to 20
ep_idx[i] = 175
buffer_index = 144 , len(sampled_ep) = 102, trace = from 58 to 66
ep_idx[i] = 151
buffer_index = 120 , len(sampled_ep) = 82, trace = from 32 to 40
ep_idx[i] = 160
buffer_index = 129 , len(sampled_ep) = 117, trace = from 75 to 83
ep_idx[i] = 286
buffer_index = 255 , len(sampled_ep) = 147, trace = from 40 to 48
ep_idx[i] = 281
buffer_index = 250 , len(sampled_ep) = 178, trace = from 145 to 153
ep_idx[i] = 289
buffer_index = 258 , len(sampled_ep) = 83, trace = from 0 to 8
ep_idx[i] = 72
buffer_index = 41 , len(sampled_ep) = 82, trace = from 64 to 72
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9984760000000126
length of poped element = 51 , cntr = 44 , diff = 7
epsilon is = 0.9984745000000126
epsilon is = 0.9984730000000126
epsilon is = 0.9984715000000126
epsilon is = 0.9984700000000126
Target Set Success
ep_idx = [263, 53, 2

buffer_index = 0 , len(sampled_ep) = 84, trace = from 7 to 15
ep_idx[i] = 291
buffer_index = 259 , len(sampled_ep) = 103, trace = from 16 to 24
ep_idx[i] = 134
buffer_index = 102 , len(sampled_ep) = 208, trace = from 114 to 122
ep_idx[i] = 266
buffer_index = 234 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 36
buffer_index = 4 , len(sampled_ep) = 134, trace = from 104 to 112
ep_idx[i] = 116
buffer_index = 84 , len(sampled_ep) = 162, trace = from 122 to 130
ep_idx[i] = 190
buffer_index = 158 , len(sampled_ep) = 42, trace = from 29 to 37
ep_idx[i] = 236
buffer_index = 204 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 225
buffer_index = 193 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 55
buffer_index = 23 , len(sampled_ep) = 76, trace = from 5 to 13
ep_idx[i] = 213
buffer_index = 181 , len(sampled_ep) = 148, trace = from 21 to 29
ep_idx[i] = 43
buffer_index = 11 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 216
buffer_index = 184 , len(samp

ep_idx[i] = 256
buffer_index = 224 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 208
buffer_index = 176 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 222
buffer_index = 190 , len(sampled_ep) = 224, trace = from 89 to 97
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9984385000000129
epsilon is = 0.9984370000000129
epsilon is = 0.9984355000000129
epsilon is = 0.9984340000000129
epsilon is = 0.9984325000000129
Target Set Success
ep_idx = [45, 107, 181, 266, 199, 229, 118, 330, 133, 288, 222, 119, 86, 206, 123, 284, 179, 232, 142, 140, 70, 234, 292, 70, 259, 272, 56, 136, 102, 106, 155, 41]
exp_idx = [68, 17, 11, 29, 51, 17, 79, 122, 18, 64, 45, 168, 12, 14, 147, 46, 62, 32, 71, 76, 33, 141, 70, 117, 48, 44, 109, 92, 79, 15, 9, 7]
idx_offset = 32, self.episode_index = 331, len(self.buffer) = 300
ep_idx[i] = 45
buffer_index = 13 , len(sampled_ep) = 100, trace = from 61 to 69
ep_idx[i] = 107
buffer_index = 75 , len(sampled_ep) = 109, trace = from 10 to 18
ep_idx[i] 

buffer_index = 91 , len(sampled_ep) = 198, trace = from 12 to 20
ep_idx[i] = 107
buffer_index = 75 , len(sampled_ep) = 109, trace = from 97 to 105
ep_idx[i] = 277
buffer_index = 245 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 254
buffer_index = 222 , len(sampled_ep) = 116, trace = from 50 to 58
ep_idx[i] = 329
buffer_index = 297 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 149
buffer_index = 117 , len(sampled_ep) = 82, trace = from 54 to 62
ep_idx[i] = 292
buffer_index = 260 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 184
buffer_index = 152 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 179
buffer_index = 147 , len(sampled_ep) = 77, trace = from 41 to 49
ep_idx[i] = 275
buffer_index = 243 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 41
buffer_index = 9 , len(sampled_ep) = 85, trace = from 26 to 34
ep_idx[i] = 313
buffer_index = 281 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 48
buffer_index = 16 , len(sampled_e

ep_idx = [290, 94, 234, 310, 254, 101, 308, 147, 93, 330, 233, 134, 301, 102, 197, 331, 79, 79, 267, 212, 251, 252, 200, 228, 260, 71, 262, 135, 310, 113, 196, 295]
exp_idx = [105, 14, 33, 147, 114, 24, 17, 40, 74, 86, 35, 151, 23, 77, 22, 20, 46, 43, 27, 58, 88, 16, 69, 44, 70, 15, 17, 119, 161, 17, 101, 29]
idx_offset = 32, self.episode_index = 331, len(self.buffer) = 300
ep_idx[i] = 290
buffer_index = 258 , len(sampled_ep) = 152, trace = from 98 to 106
ep_idx[i] = 94
buffer_index = 62 , len(sampled_ep) = 45, trace = from 7 to 15
ep_idx[i] = 234
buffer_index = 202 , len(sampled_ep) = 167, trace = from 26 to 34
ep_idx[i] = 310
buffer_index = 278 , len(sampled_ep) = 164, trace = from 140 to 148
ep_idx[i] = 254
buffer_index = 222 , len(sampled_ep) = 116, trace = from 107 to 115
ep_idx[i] = 101
buffer_index = 69 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 308
buffer_index = 276 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 147
buffer_index = 115 , len(sampled_ep

ep_idx[i] = 298
buffer_index = 266 , len(sampled_ep) = 115, trace = from 61 to 69
ep_idx[i] = 39
buffer_index = 7 , len(sampled_ep) = 133, trace = from 6 to 14
ep_idx[i] = 87
buffer_index = 55 , len(sampled_ep) = 76, trace = from 36 to 44
ep_idx[i] = 284
buffer_index = 252 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 286
buffer_index = 254 , len(sampled_ep) = 147, trace = from 39 to 47
ep_idx[i] = 330
buffer_index = 298 , len(sampled_ep) = 175, trace = from 83 to 91
ep_idx[i] = 301
buffer_index = 269 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 98
buffer_index = 66 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 290
buffer_index = 258 , len(sampled_ep) = 152, trace = from 109 to 117
ep_idx[i] = 286
buffer_index = 254 , len(sampled_ep) = 147, trace = from 123 to 131
ep_idx[i] = 105
buffer_index = 73 , len(sampled_ep) = 74, trace = from 5 to 13
ep_idx[i] = 112
buffer_index = 80 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 312
buffer_index 

buffer_index = 17 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 151
buffer_index = 119 , len(sampled_ep) = 82, trace = from 9 to 17
ep_idx[i] = 135
buffer_index = 103 , len(sampled_ep) = 211, trace = from 122 to 130
ep_idx[i] = 134
buffer_index = 102 , len(sampled_ep) = 208, trace = from 27 to 35
ep_idx[i] = 158
buffer_index = 126 , len(sampled_ep) = 83, trace = from 54 to 62
ep_idx[i] = 266
buffer_index = 234 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 223
buffer_index = 191 , len(sampled_ep) = 112, trace = from 30 to 38
ep_idx[i] = 287
buffer_index = 255 , len(sampled_ep) = 194, trace = from 12 to 20
ep_idx[i] = 199
buffer_index = 167 , len(sampled_ep) = 116, trace = from 93 to 101
ep_idx[i] = 42
buffer_index = 10 , len(sampled_ep) = 17, trace = from 6 to 14
ep_idx[i] = 264
buffer_index = 232 , len(sampled_ep) = 109, trace = from 4 to 12
ep_idx[i] = 316
buffer_index = 284 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 123
buffer_index = 91 , len(sa

buffer_index = 97 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 330
buffer_index = 297 , len(sampled_ep) = 175, trace = from 127 to 135
ep_idx[i] = 211
buffer_index = 178 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 69
buffer_index = 36 , len(sampled_ep) = 44, trace = from 11 to 19
ep_idx[i] = 58
buffer_index = 25 , len(sampled_ep) = 49, trace = from 9 to 17
ep_idx[i] = 138
buffer_index = 105 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 184
buffer_index = 151 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 247
buffer_index = 214 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 147
buffer_index = 114 , len(sampled_ep) = 81, trace = from 13 to 21
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9983410000000137
epsilon is = 0.9983395000000137
epsilon is = 0.9983380000000137
epsilon is = 0.9983365000000137
epsilon is = 0.9983350000000137
Target Set Success
ep_idx = [285, 214, 193, 294, 302, 77, 188, 332, 201, 102, 260, 140, 154, 119,

buffer_index = 299 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 299
buffer_index = 266 , len(sampled_ep) = 100, trace = from 5 to 13
ep_idx[i] = 287
buffer_index = 254 , len(sampled_ep) = 194, trace = from 52 to 60
ep_idx[i] = 129
buffer_index = 96 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 268
buffer_index = 235 , len(sampled_ep) = 120, trace = from 75 to 83
ep_idx[i] = 134
buffer_index = 101 , len(sampled_ep) = 208, trace = from 167 to 175
ep_idx[i] = 220
buffer_index = 187 , len(sampled_ep) = 118, trace = from 86 to 94
ep_idx[i] = 88
buffer_index = 55 , len(sampled_ep) = 114, trace = from 76 to 84
ep_idx[i] = 36
buffer_index = 3 , len(sampled_ep) = 134, trace = from 105 to 113
ep_idx[i] = 144
buffer_index = 111 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 167
buffer_index = 134 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 249
buffer_index = 216 , len(sampled_ep) = 75, trace = from 4 to 12
ep_idx[i] = 279
buffer_index = 246 , len(s

ep_idx[i] = 75
buffer_index = 42 , len(sampled_ep) = 75, trace = from 4 to 12
ep_idx[i] = 117
buffer_index = 84 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 102
buffer_index = 69 , len(sampled_ep) = 103, trace = from 72 to 80
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.998303500000014
epsilon is = 0.998302000000014
epsilon is = 0.998300500000014
epsilon is = 0.998299000000014
epsilon is = 0.998297500000014
Target Set Success
ep_idx = [197, 108, 105, 91, 205, 148, 208, 301, 139, 293, 223, 315, 209, 76, 106, 323, 115, 165, 222, 57, 71, 210, 329, 291, 94, 273, 252, 36, 109, 56, 186, 275]
exp_idx = [74, 43, 49, 43, 11, 63, 21, 9, 30, 43, 29, 61, 21, 143, 81, 18, 17, 7, 105, 82, 128, 44, 62, 92, 27, 16, 17, 34, 14, 98, 18, 16]
idx_offset = 33, self.episode_index = 332, len(self.buffer) = 300
ep_idx[i] = 197
buffer_index = 164 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 108
buffer_index = 75 , len(sampled_ep) = 45, trace = from 36 to 44
ep_idx[i] = 105
buffer_i

ep_idx[i] = 326
buffer_index = 293 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 66
buffer_index = 33 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 306
buffer_index = 273 , len(sampled_ep) = 117, trace = from 47 to 55
ep_idx[i] = 161
buffer_index = 128 , len(sampled_ep) = 114, trace = from 9 to 17
ep_idx[i] = 116
buffer_index = 83 , len(sampled_ep) = 162, trace = from 153 to 161
ep_idx[i] = 309
buffer_index = 276 , len(sampled_ep) = 115, trace = from 5 to 13
ep_idx[i] = 37
buffer_index = 4 , len(sampled_ep) = 44, trace = from 23 to 31
ep_idx[i] = 332
buffer_index = 299 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 254
buffer_index = 221 , len(sampled_ep) = 116, trace = from 102 to 110
ep_idx[i] = 261
buffer_index = 228 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 223
buffer_index = 190 , len(sampled_ep) = 112, trace = from 101 to 109
ep_idx[i] = 302
buffer_index = 269 , len(sampled_ep) = 79, trace = from 65 to 73
ep_idx[i] = 199
buffer_ind

ep_idx = [293, 306, 54, 261, 88, 122, 103, 157, 236, 322, 291, 45, 128, 84, 80, 237, 306, 145, 113, 39, 158, 40, 330, 161, 320, 119, 326, 71, 225, 53, 182, 81]
exp_idx = [7, 56, 9, 16, 7, 59, 18, 49, 9, 104, 88, 32, 8, 69, 53, 53, 89, 13, 77, 63, 56, 113, 36, 112, 33, 123, 24, 16, 38, 65, 23, 41]
idx_offset = 33, self.episode_index = 332, len(self.buffer) = 300
ep_idx[i] = 293
buffer_index = 260 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 306
buffer_index = 273 , len(sampled_ep) = 117, trace = from 49 to 57
ep_idx[i] = 54
buffer_index = 21 , len(sampled_ep) = 47, trace = from 2 to 10
ep_idx[i] = 261
buffer_index = 228 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 88
buffer_index = 55 , len(sampled_ep) = 114, trace = from 0 to 8
ep_idx[i] = 122
buffer_index = 89 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 103
buffer_index = 70 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 157
buffer_index = 124 , len(sampled_ep) = 103, trace = from 42 to 

buffer_index = 273 , len(sampled_ep) = 117, trace = from 86 to 94
ep_idx[i] = 82
buffer_index = 49 , len(sampled_ep) = 73, trace = from 33 to 41
ep_idx[i] = 207
buffer_index = 174 , len(sampled_ep) = 75, trace = from 14 to 22
ep_idx[i] = 156
buffer_index = 123 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 202
buffer_index = 169 , len(sampled_ep) = 143, trace = from 48 to 56
ep_idx[i] = 62
buffer_index = 29 , len(sampled_ep) = 77, trace = from 47 to 55
ep_idx[i] = 199
buffer_index = 166 , len(sampled_ep) = 116, trace = from 79 to 87
ep_idx[i] = 265
buffer_index = 232 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 123
buffer_index = 90 , len(sampled_ep) = 198, trace = from 116 to 124
ep_idx[i] = 73
buffer_index = 40 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 310
buffer_index = 277 , len(sampled_ep) = 164, trace = from 83 to 91
ep_idx[i] = 55
buffer_index = 22 , len(sampled_ep) = 76, trace = from 61 to 69
ep_idx[i] = 327
buffer_index = 294 , len(sample

ep_idx[i] = 259
buffer_index = 225 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 72
buffer_index = 38 , len(sampled_ep) = 82, trace = from 6 to 14
ep_idx[i] = 110
buffer_index = 76 , len(sampled_ep) = 137, trace = from 16 to 24
ep_idx[i] = 89
buffer_index = 55 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 111
buffer_index = 77 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 294
buffer_index = 260 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 286
buffer_index = 252 , len(sampled_ep) = 147, trace = from 6 to 14
ep_idx[i] = 145
buffer_index = 111 , len(sampled_ep) = 117, trace = from 5 to 13
ep_idx[i] = 71
buffer_index = 37 , len(sampled_ep) = 299, trace = from 148 to 156
ep_idx[i] = 225
buffer_index = 191 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 219
buffer_index = 185 , len(sampled_ep) = 110, trace = from 48 to 56
ep_idx[i] = 158
buffer_index = 124 , len(sampled_ep) = 83, trace = from 73 to 81
ep_idx[i] = 309
buffer_index = 27

buffer_index = 285 , len(sampled_ep) = 70, trace = from 45 to 53
ep_idx[i] = 275
buffer_index = 241 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 86
buffer_index = 52 , len(sampled_ep) = 77, trace = from 18 to 26
ep_idx[i] = 83
buffer_index = 49 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 162
buffer_index = 128 , len(sampled_ep) = 112, trace = from 11 to 19
ep_idx[i] = 241
buffer_index = 207 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 84
buffer_index = 50 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 39
buffer_index = 5 , len(sampled_ep) = 133, trace = from 64 to 72
ep_idx[i] = 51
buffer_index = 17 , len(sampled_ep) = 145, trace = from 25 to 33
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9982060000000148
epsilon is = 0.9982045000000148
epsilon is = 0.9982030000000148
epsilon is = 0.9982015000000148
epsilon is = 0.9982000000000149
Target Set Success
ep_idx = [147, 229, 57, 257, 318, 102, 305, 235, 328, 330, 95, 85, 165, 83, 330, 239

ep_idx[i] = 70
buffer_index = 36 , len(sampled_ep) = 198, trace = from 120 to 128
ep_idx[i] = 238
buffer_index = 204 , len(sampled_ep) = 79, trace = from 59 to 67
ep_idx[i] = 286
buffer_index = 252 , len(sampled_ep) = 147, trace = from 96 to 104
ep_idx[i] = 128
buffer_index = 94 , len(sampled_ep) = 74, trace = from 32 to 40
ep_idx[i] = 91
buffer_index = 57 , len(sampled_ep) = 46, trace = from 8 to 16
ep_idx[i] = 81
buffer_index = 47 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 255
buffer_index = 221 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 119
buffer_index = 85 , len(sampled_ep) = 183, trace = from 101 to 109
ep_idx[i] = 40
buffer_index = 6 , len(sampled_ep) = 116, trace = from 10 to 18
ep_idx[i] = 228
buffer_index = 194 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 306
buffer_index = 272 , len(sampled_ep) = 117, trace = from 79 to 87
ep_idx[i] = 134
buffer_index = 100 , len(sampled_ep) = 208, trace = from 154 to 162
ep_idx[i] = 324
buffer_index

ep_idx[i] = 260
buffer_index = 226 , len(sampled_ep) = 108, trace = from 69 to 77
ep_idx[i] = 162
buffer_index = 128 , len(sampled_ep) = 112, trace = from 78 to 86
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9981685000000151
epsilon is = 0.9981670000000151
epsilon is = 0.9981655000000151
epsilon is = 0.9981640000000152
epsilon is = 0.9981625000000152
Target Set Success
ep_idx = [229, 198, 295, 266, 162, 320, 100, 312, 88, 328, 157, 279, 178, 135, 186, 232, 208, 152, 96, 166, 180, 316, 277, 55, 246, 66, 40, 53, 287, 333, 131, 271]
exp_idx = [18, 12, 17, 29, 34, 60, 84, 26, 21, 47, 8, 17, 13, 192, 16, 65, 52, 78, 17, 76, 134, 16, 56, 51, 32, 53, 111, 18, 138, 70, 79, 31]
idx_offset = 34, self.episode_index = 333, len(self.buffer) = 300
ep_idx[i] = 229
buffer_index = 195 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 198
buffer_index = 164 , len(sampled_ep) = 42, trace = from 5 to 13
ep_idx[i] = 295
buffer_index = 261 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] 

buffer_index = 290 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 254
buffer_index = 220 , len(sampled_ep) = 116, trace = from 94 to 102
ep_idx[i] = 92
buffer_index = 58 , len(sampled_ep) = 281, trace = from 244 to 252
ep_idx[i] = 135
buffer_index = 101 , len(sampled_ep) = 211, trace = from 34 to 42
ep_idx[i] = 54
buffer_index = 20 , len(sampled_ep) = 47, trace = from 9 to 17
ep_idx[i] = 197
buffer_index = 163 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 74
buffer_index = 40 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 144
buffer_index = 110 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 38
buffer_index = 4 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 298
buffer_index = 264 , len(sampled_ep) = 115, trace = from 94 to 102
ep_idx[i] = 55
buffer_index = 21 , len(sampled_ep) = 76, trace = from 25 to 33
ep_idx[i] = 39
buffer_index = 5 , len(sampled_ep) = 133, trace = from 49 to 57
ep_idx[i] = 66
buffer_index = 32 , len(sampled_ep) 

ep_idx = [331, 302, 102, 262, 135, 200, 317, 299, 85, 152, 113, 315, 102, 228, 223, 95, 208, 215, 115, 254, 39, 133, 66, 40, 320, 161, 329, 118, 273, 283, 56, 40]
exp_idx = [55, 22, 92, 55, 43, 67, 83, 34, 50, 116, 14, 71, 98, 36, 42, 21, 17, 59, 185, 46, 26, 22, 76, 16, 13, 8, 15, 76, 7, 17, 9, 60]
idx_offset = 34, self.episode_index = 333, len(self.buffer) = 300
ep_idx[i] = 331
buffer_index = 297 , len(sampled_ep) = 84, trace = from 48 to 56
ep_idx[i] = 302
buffer_index = 268 , len(sampled_ep) = 79, trace = from 15 to 23
ep_idx[i] = 102
buffer_index = 68 , len(sampled_ep) = 103, trace = from 85 to 93
ep_idx[i] = 262
buffer_index = 228 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 135
buffer_index = 101 , len(sampled_ep) = 211, trace = from 36 to 44
ep_idx[i] = 200
buffer_index = 166 , len(sampled_ep) = 82, trace = from 60 to 68
ep_idx[i] = 317
buffer_index = 283 , len(sampled_ep) = 84, trace = from 76 to 84
ep_idx[i] = 299
buffer_index = 265 , len(sampled_ep) = 100, trace

buffer_index = 245 , len(sampled_ep) = 48, trace = from 33 to 41
ep_idx[i] = 311
buffer_index = 277 , len(sampled_ep) = 84, trace = from 46 to 54
ep_idx[i] = 318
buffer_index = 284 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 231
buffer_index = 197 , len(sampled_ep) = 77, trace = from 13 to 21
ep_idx[i] = 123
buffer_index = 89 , len(sampled_ep) = 198, trace = from 52 to 60
ep_idx[i] = 205
buffer_index = 171 , len(sampled_ep) = 78, trace = from 36 to 44
ep_idx[i] = 201
buffer_index = 167 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 56
buffer_index = 22 , len(sampled_ep) = 198, trace = from 169 to 177
ep_idx[i] = 77
buffer_index = 43 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 252
buffer_index = 218 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 240
buffer_index = 206 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 254
buffer_index = 220 , len(sampled_ep) = 116, trace = from 11 to 19
ep_idx[i] = 281
buffer_index = 247 , len(sam

buffer_index = 184 , len(sampled_ep) = 80, trace = from 60 to 68
ep_idx[i] = 213
buffer_index = 179 , len(sampled_ep) = 148, trace = from 17 to 25
ep_idx[i] = 276
buffer_index = 242 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 34
buffer_index = 0 , len(sampled_ep) = 82, trace = from 22 to 30
ep_idx[i] = 299
buffer_index = 265 , len(sampled_ep) = 100, trace = from 4 to 12
ep_idx[i] = 140
buffer_index = 106 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 76
buffer_index = 42 , len(sampled_ep) = 198, trace = from 32 to 40
ep_idx[i] = 85
buffer_index = 51 , len(sampled_ep) = 108, trace = from 57 to 65
ep_idx[i] = 78
buffer_index = 44 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 105
buffer_index = 71 , len(sampled_ep) = 74, trace = from 16 to 24
ep_idx[i] = 147
buffer_index = 113 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 326
buffer_index = 292 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 116
buffer_index = 82 , len(sampled_ep) =

ep_idx[i] = 310
buffer_index = 276 , len(sampled_ep) = 164, trace = from 29 to 37
ep_idx[i] = 59
buffer_index = 25 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 165
buffer_index = 131 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 97
buffer_index = 63 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 110
buffer_index = 76 , len(sampled_ep) = 137, trace = from 76 to 84
ep_idx[i] = 87
buffer_index = 53 , len(sampled_ep) = 76, trace = from 64 to 72
ep_idx[i] = 279
buffer_index = 245 , len(sampled_ep) = 48, trace = from 9 to 17
ep_idx[i] = 38
buffer_index = 4 , len(sampled_ep) = 81, trace = from 73 to 81
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9980710000000159
epsilon is = 0.9980695000000159
epsilon is = 0.9980680000000159
epsilon is = 0.998066500000016
epsilon is = 0.998065000000016
Target Set Success
ep_idx = [288, 102, 241, 177, 251, 175, 131, 95, 202, 230, 276, 175, 161, 135, 165, 132, 195, 132, 290, 203, 333, 116, 131, 231, 330, 287, 259, 193, 115

ep_idx[i] = 56
buffer_index = 21 , len(sampled_ep) = 198, trace = from 55 to 63
ep_idx[i] = 175
buffer_index = 140 , len(sampled_ep) = 102, trace = from 77 to 85
ep_idx[i] = 190
buffer_index = 155 , len(sampled_ep) = 42, trace = from 30 to 38
ep_idx[i] = 193
buffer_index = 158 , len(sampled_ep) = 108, trace = from 86 to 94
ep_idx[i] = 49
buffer_index = 14 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 72
buffer_index = 37 , len(sampled_ep) = 82, trace = from 0 to 8
ep_idx[i] = 214
buffer_index = 179 , len(sampled_ep) = 75, trace = from 42 to 50
ep_idx[i] = 197
buffer_index = 162 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 196
buffer_index = 161 , len(sampled_ep) = 135, trace = from 112 to 120
ep_idx[i] = 291
buffer_index = 256 , len(sampled_ep) = 103, trace = from 48 to 56
ep_idx[i] = 177
buffer_index = 142 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 51
buffer_index = 16 , len(sampled_ep) = 145, trace = from 120 to 128
ep_idx[i] = 123
buffer_index

buffer_index = 299 , len(sampled_ep) = 113, trace = from 16 to 24
ep_idx[i] = 277
buffer_index = 242 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 177
buffer_index = 142 , len(sampled_ep) = 81, trace = from 13 to 21
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9980335000000162
epsilon is = 0.9980320000000162
epsilon is = 0.9980305000000163
epsilon is = 0.9980290000000163
epsilon is = 0.9980275000000163
Target Set Success
ep_idx = [211, 100, 305, 80, 334, 164, 140, 88, 123, 221, 172, 102, 158, 148, 222, 121, 93, 39, 91, 45, 162, 242, 239, 295, 301, 175, 196, 312, 92, 67, 163, 134]
exp_idx = [18, 70, 10, 67, 103, 19, 8, 24, 8, 62, 16, 96, 59, 28, 111, 30, 44, 30, 37, 50, 47, 80, 24, 25, 13, 76, 38, 85, 22, 17, 121, 38]
idx_offset = 35, self.episode_index = 334, len(self.buffer) = 300
ep_idx[i] = 211
buffer_index = 176 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 100
buffer_index = 65 , len(sampled_ep) = 102, trace = from 63 to 71
ep_idx[i] = 305
buffer_index = 

buffer_index = 37 , len(sampled_ep) = 82, trace = from 17 to 25
ep_idx[i] = 107
buffer_index = 72 , len(sampled_ep) = 109, trace = from 21 to 29
ep_idx[i] = 36
buffer_index = 1 , len(sampled_ep) = 134, trace = from 76 to 84
ep_idx[i] = 310
buffer_index = 275 , len(sampled_ep) = 164, trace = from 31 to 39
ep_idx[i] = 54
buffer_index = 19 , len(sampled_ep) = 47, trace = from 1 to 9
ep_idx[i] = 153
buffer_index = 118 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 324
buffer_index = 289 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 313
buffer_index = 278 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 332
buffer_index = 297 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 71
buffer_index = 36 , len(sampled_ep) = 299, trace = from 75 to 83
ep_idx[i] = 163
buffer_index = 128 , len(sampled_ep) = 134, trace = from 42 to 50
ep_idx[i] = 312
buffer_index = 277 , len(sampled_ep) = 108, trace = from 6 to 14
ep_idx[i] = 275
buffer_index = 240 , len(sampled_ep

ep_idx = [123, 55, 171, 186, 265, 92, 332, 115, 129, 78, 183, 42, 165, 174, 133, 322, 329, 197, 92, 258, 243, 268, 329, 251, 221, 110, 39, 47, 334, 152, 299, 207]
exp_idx = [58, 32, 16, 23, 24, 15, 76, 127, 18, 24, 14, 12, 49, 16, 25, 64, 60, 13, 11, 9, 62, 94, 34, 84, 20, 33, 44, 55, 106, 148, 13, 7]
idx_offset = 35, self.episode_index = 334, len(self.buffer) = 300
ep_idx[i] = 123
buffer_index = 88 , len(sampled_ep) = 198, trace = from 51 to 59
ep_idx[i] = 55
buffer_index = 20 , len(sampled_ep) = 76, trace = from 25 to 33
ep_idx[i] = 171
buffer_index = 136 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 186
buffer_index = 151 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 265
buffer_index = 230 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 92
buffer_index = 57 , len(sampled_ep) = 281, trace = from 8 to 16
ep_idx[i] = 332
buffer_index = 297 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 115
buffer_index = 80 , len(sampled_ep) = 299, trace = fr

buffer_index = 99 , len(sampled_ep) = 208, trace = from 13 to 21
ep_idx[i] = 214
buffer_index = 179 , len(sampled_ep) = 75, trace = from 37 to 45
ep_idx[i] = 287
buffer_index = 252 , len(sampled_ep) = 194, trace = from 11 to 19
ep_idx[i] = 330
buffer_index = 295 , len(sampled_ep) = 175, trace = from 18 to 26
ep_idx[i] = 139
buffer_index = 104 , len(sampled_ep) = 133, trace = from 57 to 65
ep_idx[i] = 334
buffer_index = 299 , len(sampled_ep) = 113, trace = from 83 to 91
ep_idx[i] = 274
buffer_index = 239 , len(sampled_ep) = 114, trace = from 11 to 19
ep_idx[i] = 265
buffer_index = 230 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 122
buffer_index = 87 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 286
buffer_index = 251 , len(sampled_ep) = 147, trace = from 80 to 88
ep_idx[i] = 256
buffer_index = 221 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 107
buffer_index = 72 , len(sampled_ep) = 109, trace = from 9 to 17
ep_idx[i] = 281
buffer_index = 246 , len

buffer_index = 84 , len(sampled_ep) = 183, trace = from 164 to 172
ep_idx[i] = 334
buffer_index = 299 , len(sampled_ep) = 113, trace = from 81 to 89
ep_idx[i] = 330
buffer_index = 295 , len(sampled_ep) = 175, trace = from 91 to 99
ep_idx[i] = 97
buffer_index = 62 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 242
buffer_index = 207 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 56
buffer_index = 21 , len(sampled_ep) = 198, trace = from 122 to 130
ep_idx[i] = 94
buffer_index = 59 , len(sampled_ep) = 45, trace = from 34 to 42
ep_idx[i] = 290
buffer_index = 255 , len(sampled_ep) = 152, trace = from 79 to 87
ep_idx[i] = 40
buffer_index = 5 , len(sampled_ep) = 116, trace = from 21 to 29
ep_idx[i] = 162
buffer_index = 127 , len(sampled_ep) = 112, trace = from 62 to 70
ep_idx[i] = 92
buffer_index = 57 , len(sampled_ep) = 281, trace = from 10 to 18
ep_idx[i] = 240
buffer_index = 205 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 330
buffer_index = 295 , len(sample

buffer_index = 85 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 231
buffer_index = 195 , len(sampled_ep) = 77, trace = from 48 to 56
ep_idx[i] = 221
buffer_index = 185 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 334
buffer_index = 298 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 318
buffer_index = 282 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 76
buffer_index = 40 , len(sampled_ep) = 198, trace = from 5 to 13
ep_idx[i] = 215
buffer_index = 179 , len(sampled_ep) = 117, trace = from 78 to 86
ep_idx[i] = 286
buffer_index = 250 , len(sampled_ep) = 147, trace = from 48 to 56
ep_idx[i] = 254
buffer_index = 218 , len(sampled_ep) = 116, trace = from 98 to 106
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.997936000000017
epsilon is = 0.997934500000017
epsilon is = 0.9979330000000171
epsilon is = 0.9979315000000171
epsilon is = 0.9979300000000171
Target Set Success
ep_idx = [246, 254, 172, 104, 150, 267, 146, 53, 272, 306, 119, 164, 137,

buffer_index = 73 , len(sampled_ep) = 48, trace = from 35 to 43
ep_idx[i] = 97
buffer_index = 61 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 273
buffer_index = 237 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 290
buffer_index = 254 , len(sampled_ep) = 152, trace = from 97 to 105
ep_idx[i] = 75
buffer_index = 39 , len(sampled_ep) = 75, trace = from 26 to 34
ep_idx[i] = 118
buffer_index = 82 , len(sampled_ep) = 111, trace = from 7 to 15
ep_idx[i] = 71
buffer_index = 35 , len(sampled_ep) = 299, trace = from 21 to 29
ep_idx[i] = 164
buffer_index = 128 , len(sampled_ep) = 118, trace = from 11 to 19
ep_idx[i] = 114
buffer_index = 78 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 118
buffer_index = 82 , len(sampled_ep) = 111, trace = from 20 to 28
ep_idx[i] = 43
buffer_index = 7 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 216
buffer_index = 180 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 164
buffer_index = 128 , len(sampled_ep) 

buffer_index = 151 , len(sampled_ep) = 84, trace = from 45 to 53
ep_idx[i] = 261
buffer_index = 225 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 259
buffer_index = 223 , len(sampled_ep) = 81, trace = from 14 to 22
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9978985000000173
epsilon is = 0.9978970000000174
epsilon is = 0.9978955000000174
epsilon is = 0.9978940000000174
epsilon is = 0.9978925000000174
Target Set Success
ep_idx = [183, 334, 297, 99, 207, 186, 191, 287, 43, 321, 156, 186, 53, 163, 310, 149, 81, 108, 139, 176, 95, 162, 168, 102, 174, 115, 162, 334, 325, 250, 88, 259]
exp_idx = [30, 59, 30, 57, 45, 11, 9, 109, 23, 22, 33, 69, 36, 19, 45, 19, 64, 15, 12, 68, 50, 24, 30, 26, 33, 13, 102, 90, 73, 35, 105, 74]
idx_offset = 36, self.episode_index = 335, len(self.buffer) = 300
ep_idx[i] = 183
buffer_index = 147 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 334
buffer_index = 298 , len(sampled_ep) = 113, trace = from 52 to 60
ep_idx[i] = 297
buffer_inde

buffer_index = 46 , len(sampled_ep) = 73, trace = from 0 to 8
ep_idx[i] = 135
buffer_index = 99 , len(sampled_ep) = 211, trace = from 147 to 155
ep_idx[i] = 334
buffer_index = 298 , len(sampled_ep) = 113, trace = from 104 to 112
ep_idx[i] = 127
buffer_index = 91 , len(sampled_ep) = 50, trace = from 17 to 25
ep_idx[i] = 208
buffer_index = 172 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 119
buffer_index = 83 , len(sampled_ep) = 183, trace = from 35 to 43
ep_idx[i] = 91
buffer_index = 55 , len(sampled_ep) = 46, trace = from 5 to 13
ep_idx[i] = 154
buffer_index = 118 , len(sampled_ep) = 153, trace = from 79 to 87
ep_idx[i] = 330
buffer_index = 294 , len(sampled_ep) = 175, trace = from 166 to 174
ep_idx[i] = 37
buffer_index = 1 , len(sampled_ep) = 44, trace = from 7 to 15
ep_idx[i] = 234
buffer_index = 198 , len(sampled_ep) = 167, trace = from 133 to 141
ep_idx[i] = 61
buffer_index = 25 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 229
buffer_index = 193 , len(samp

ep_idx = [306, 70, 143, 158, 196, 276, 304, 227, 37, 288, 192, 67, 76, 154, 135, 199, 329, 65, 162, 241, 110, 211, 131, 330, 113, 76, 309, 326, 38, 282, 325, 295]
exp_idx = [87, 176, 35, 55, 26, 56, 16, 57, 12, 65, 9, 16, 16, 114, 142, 18, 66, 13, 45, 44, 60, 43, 36, 49, 14, 16, 12, 78, 7, 15, 25, 67]
idx_offset = 36, self.episode_index = 335, len(self.buffer) = 300
ep_idx[i] = 306
buffer_index = 270 , len(sampled_ep) = 117, trace = from 80 to 88
ep_idx[i] = 70
buffer_index = 34 , len(sampled_ep) = 198, trace = from 169 to 177
ep_idx[i] = 143
buffer_index = 107 , len(sampled_ep) = 78, trace = from 28 to 36
ep_idx[i] = 158
buffer_index = 122 , len(sampled_ep) = 83, trace = from 48 to 56
ep_idx[i] = 196
buffer_index = 160 , len(sampled_ep) = 135, trace = from 19 to 27
ep_idx[i] = 276
buffer_index = 240 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 304
buffer_index = 268 , len(sampled_ep) = 72, trace = from 9 to 17
ep_idx[i] = 227
buffer_index = 191 , len(sampled_ep) = 112, tr

buffer_index = 82 , len(sampled_ep) = 111, trace = from 71 to 79
ep_idx[i] = 243
buffer_index = 207 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 334
buffer_index = 298 , len(sampled_ep) = 113, trace = from 95 to 103
ep_idx[i] = 84
buffer_index = 48 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 333
buffer_index = 297 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 176
buffer_index = 140 , len(sampled_ep) = 80, trace = from 42 to 50
ep_idx[i] = 295
buffer_index = 259 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 330
buffer_index = 294 , len(sampled_ep) = 175, trace = from 15 to 23
ep_idx[i] = 266
buffer_index = 230 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 51
buffer_index = 15 , len(sampled_ep) = 145, trace = from 29 to 37
ep_idx[i] = 223
buffer_index = 187 , len(sampled_ep) = 112, trace = from 13 to 21
ep_idx[i] = 334
buffer_index = 298 , len(sampled_ep) = 113, trace = from 86 to 94
ep_idx[i] = 160
buffer_index = 124 , len(sa

buffer_index = 229 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 147
buffer_index = 111 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 261
buffer_index = 225 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 261
buffer_index = 225 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 310
buffer_index = 274 , len(sampled_ep) = 164, trace = from 141 to 149
ep_idx[i] = 206
buffer_index = 170 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 225
buffer_index = 189 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 215
buffer_index = 179 , len(sampled_ep) = 117, trace = from 98 to 106
ep_idx[i] = 60
buffer_index = 24 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 223
buffer_index = 187 , len(sampled_ep) = 112, trace = from 81 to 89
ep_idx[i] = 141
buffer_index = 105 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 94
buffer_index = 58 , len(sampled_ep) = 45, trace = from 8 to 16
ep_idx[i] = 255
buffer_index = 219 , len(sample

buffer_index = 141 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 50
buffer_index = 14 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 281
buffer_index = 245 , len(sampled_ep) = 178, trace = from 109 to 117
ep_idx[i] = 189
buffer_index = 153 , len(sampled_ep) = 84, trace = from 7 to 15
ep_idx[i] = 200
buffer_index = 164 , len(sampled_ep) = 82, trace = from 7 to 15
ep_idx[i] = 134
buffer_index = 98 , len(sampled_ep) = 208, trace = from 123 to 131
ep_idx[i] = 132
buffer_index = 96 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 116
buffer_index = 80 , len(sampled_ep) = 162, trace = from 125 to 133
ep_idx[i] = 219
buffer_index = 183 , len(sampled_ep) = 110, trace = from 6 to 14
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9978010000000181
epsilon is = 0.9977995000000182
epsilon is = 0.9977980000000182
epsilon is = 0.9977965000000182
epsilon is = 0.9977950000000182
Target Set Success
ep_idx = [322, 234, 215, 334, 215, 334, 281, 334, 276, 136, 296, 187, 236,

buffer_index = 227 , len(sampled_ep) = 109, trace = from 72 to 80
ep_idx[i] = 293
buffer_index = 256 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 122
buffer_index = 85 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 290
buffer_index = 253 , len(sampled_ep) = 152, trace = from 1 to 9
ep_idx[i] = 334
buffer_index = 297 , len(sampled_ep) = 113, trace = from 93 to 101
ep_idx[i] = 153
buffer_index = 116 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 81
buffer_index = 44 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 334
buffer_index = 297 , len(sampled_ep) = 113, trace = from 93 to 101
ep_idx[i] = 295
buffer_index = 258 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 125
buffer_index = 88 , len(sampled_ep) = 75, trace = from 29 to 37
ep_idx[i] = 65
buffer_index = 28 , len(sampled_ep) = 45, trace = from 6 to 14
ep_idx[i] = 324
buffer_index = 287 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 243
buffer_index = 206 , len(sampled_

buffer_index = 297 , len(sampled_ep) = 113, trace = from 103 to 111
ep_idx[i] = 66
buffer_index = 29 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 212
buffer_index = 175 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 336
buffer_index = 299 , len(sampled_ep) = 102, trace = from 79 to 87
ep_idx[i] = 49
buffer_index = 12 , len(sampled_ep) = 81, trace = from 12 to 20
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9977635000000185
epsilon is = 0.9977620000000185
epsilon is = 0.9977605000000185
epsilon is = 0.9977590000000185
epsilon is = 0.9977575000000185
Target Set Success
ep_idx = [253, 315, 234, 201, 51, 239, 311, 334, 312, 239, 283, 286, 239, 329, 196, 107, 123, 202, 279, 138, 56, 318, 87, 334, 123, 166, 320, 167, 153, 328, 258, 333]
exp_idx = [24, 43, 162, 17, 51, 62, 82, 99, 71, 80, 17, 58, 29, 52, 123, 56, 51, 16, 14, 47, 18, 71, 66, 110, 51, 56, 27, 25, 20, 21, 21, 22]
idx_offset = 37, self.episode_index = 336, len(self.buffer) = 300
ep_idx[i] = 253
buffer_i

buffer_index = 126 , len(sampled_ep) = 134, trace = from 125 to 133
ep_idx[i] = 81
buffer_index = 44 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 223
buffer_index = 186 , len(sampled_ep) = 112, trace = from 43 to 51
ep_idx[i] = 145
buffer_index = 108 , len(sampled_ep) = 117, trace = from 3 to 11
ep_idx[i] = 61
buffer_index = 24 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 151
buffer_index = 114 , len(sampled_ep) = 82, trace = from 33 to 41
ep_idx[i] = 312
buffer_index = 275 , len(sampled_ep) = 108, trace = from 13 to 21
ep_idx[i] = 164
buffer_index = 127 , len(sampled_ep) = 118, trace = from 4 to 12
ep_idx[i] = 306
buffer_index = 269 , len(sampled_ep) = 117, trace = from 93 to 101
ep_idx[i] = 170
buffer_index = 133 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 85
buffer_index = 48 , len(sampled_ep) = 108, trace = from 69 to 77
ep_idx[i] = 203
buffer_index = 166 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 330
buffer_index = 293 , len(sa

epsilon is = 0.9977245000000188
epsilon is = 0.9977230000000188
epsilon is = 0.9977215000000188
epsilon is = 0.9977200000000188
Target Set Success
ep_idx = [145, 257, 104, 92, 295, 162, 73, 293, 198, 142, 46, 76, 154, 45, 47, 249, 51, 153, 60, 211, 68, 116, 148, 73, 85, 332, 47, 309, 126, 314, 252, 309]
exp_idx = [51, 57, 35, 270, 43, 37, 73, 18, 11, 69, 65, 167, 103, 50, 68, 20, 13, 16, 54, 44, 36, 83, 12, 57, 22, 80, 74, 11, 28, 18, 73, 106]
idx_offset = 37, self.episode_index = 336, len(self.buffer) = 300
ep_idx[i] = 145
buffer_index = 108 , len(sampled_ep) = 117, trace = from 44 to 52
ep_idx[i] = 257
buffer_index = 220 , len(sampled_ep) = 82, trace = from 50 to 58
ep_idx[i] = 104
buffer_index = 67 , len(sampled_ep) = 50, trace = from 28 to 36
ep_idx[i] = 92
buffer_index = 55 , len(sampled_ep) = 281, trace = from 263 to 271
ep_idx[i] = 295
buffer_index = 258 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 162
buffer_index = 125 , len(sampled_ep) = 112, trace = from 30 to 3

buffer_index = 121 , len(sampled_ep) = 83, trace = from 12 to 20
ep_idx[i] = 214
buffer_index = 177 , len(sampled_ep) = 75, trace = from 27 to 35
ep_idx[i] = 266
buffer_index = 229 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 334
buffer_index = 297 , len(sampled_ep) = 113, trace = from 104 to 112
ep_idx[i] = 155
buffer_index = 118 , len(sampled_ep) = 77, trace = from 6 to 14
ep_idx[i] = 235
buffer_index = 198 , len(sampled_ep) = 46, trace = from 20 to 28
ep_idx[i] = 114
buffer_index = 77 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 54
buffer_index = 17 , len(sampled_ep) = 47, trace = from 37 to 45
ep_idx[i] = 262
buffer_index = 225 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 265
buffer_index = 228 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 113
buffer_index = 76 , len(sampled_ep) = 105, trace = from 23 to 31
ep_idx[i] = 251
buffer_index = 214 , len(sampled_ep) = 133, trace = from 9 to 17
ep_idx[i] = 332
buffer_index = 295 , len(sample

idx_offset = 37, self.episode_index = 336, len(self.buffer) = 300
ep_idx[i] = 232
buffer_index = 195 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 309
buffer_index = 272 , len(sampled_ep) = 115, trace = from 8 to 16
ep_idx[i] = 178
buffer_index = 141 , len(sampled_ep) = 80, trace = from 17 to 25
ep_idx[i] = 228
buffer_index = 191 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 302
buffer_index = 265 , len(sampled_ep) = 79, trace = from 8 to 16
ep_idx[i] = 60
buffer_index = 23 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 92
buffer_index = 55 , len(sampled_ep) = 281, trace = from 14 to 22
ep_idx[i] = 213
buffer_index = 176 , len(sampled_ep) = 148, trace = from 85 to 93
ep_idx[i] = 247
buffer_index = 210 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 129
buffer_index = 92 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 312
buffer_index = 275 , len(sampled_ep) = 108, trace = from 13 to 21
ep_idx[i] = 175
buffer_index = 138 , len(sampled

buffer_index = 244 , len(sampled_ep) = 178, trace = from 31 to 39
ep_idx[i] = 329
buffer_index = 292 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 116
buffer_index = 79 , len(sampled_ep) = 162, trace = from 114 to 122
ep_idx[i] = 123
buffer_index = 86 , len(sampled_ep) = 198, trace = from 140 to 148
ep_idx[i] = 93
buffer_index = 56 , len(sampled_ep) = 77, trace = from 68 to 76
ep_idx[i] = 288
buffer_index = 251 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 84
buffer_index = 47 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 118
buffer_index = 81 , len(sampled_ep) = 111, trace = from 10 to 18
ep_idx[i] = 180
buffer_index = 143 , len(sampled_ep) = 141, trace = from 26 to 34
ep_idx[i] = 213
buffer_index = 176 , len(sampled_ep) = 148, trace = from 132 to 140
ep_idx[i] = 334
buffer_index = 297 , len(sampled_ep) = 113, trace = from 100 to 108
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9976660000000193
length of poped element = 44 , cntr = 37 , diff = 7


ep_idx[i] = 99
buffer_index = 61 , len(sampled_ep) = 83, trace = from 18 to 26
ep_idx[i] = 56
buffer_index = 18 , len(sampled_ep) = 198, trace = from 1 to 9
ep_idx[i] = 210
buffer_index = 172 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 318
buffer_index = 280 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 71
buffer_index = 33 , len(sampled_ep) = 299, trace = from 75 to 83
ep_idx[i] = 310
buffer_index = 272 , len(sampled_ep) = 164, trace = from 116 to 124
ep_idx[i] = 70
buffer_index = 32 , len(sampled_ep) = 198, trace = from 180 to 188
ep_idx[i] = 69
buffer_index = 31 , len(sampled_ep) = 44, trace = from 1 to 9
ep_idx[i] = 113
buffer_index = 75 , len(sampled_ep) = 105, trace = from 6 to 14
ep_idx[i] = 218
buffer_index = 180 , len(sampled_ep) = 80, trace = from 47 to 55
ep_idx[i] = 183
buffer_index = 145 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 314
buffer_index = 276 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 330
buffer_index = 292 , 

ep_idx[i] = 137
buffer_index = 99 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 121
buffer_index = 83 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 256
buffer_index = 218 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 331
buffer_index = 293 , len(sampled_ep) = 84, trace = from 53 to 61
ep_idx[i] = 93
buffer_index = 55 , len(sampled_ep) = 77, trace = from 12 to 20
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9976285000000196
epsilon is = 0.9976270000000196
epsilon is = 0.9976255000000196
epsilon is = 0.9976240000000196
epsilon is = 0.9976225000000196
Target Set Success
ep_idx = [334, 51, 52, 152, 281, 145, 260, 71, 136, 231, 98, 112, 139, 216, 172, 321, 275, 254, 334, 50, 40, 294, 80, 147, 122, 227, 330, 260, 287, 268, 330, 162]
exp_idx = [82, 33, 10, 149, 24, 11, 45, 207, 82, 55, 45, 57, 66, 37, 32, 80, 45, 51, 105, 73, 75, 46, 27, 66, 10, 86, 161, 13, 71, 30, 22, 56]
idx_offset = 38, self.episode_index = 337, len(self.buffer) = 300
ep_idx[i] = 334


buffer_index = 183 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 123
buffer_index = 85 , len(sampled_ep) = 198, trace = from 10 to 18
ep_idx[i] = 85
buffer_index = 47 , len(sampled_ep) = 108, trace = from 95 to 103
ep_idx[i] = 60
buffer_index = 22 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 283
buffer_index = 245 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 57
buffer_index = 19 , len(sampled_ep) = 110, trace = from 43 to 51
ep_idx[i] = 45
buffer_index = 7 , len(sampled_ep) = 100, trace = from 3 to 11
ep_idx[i] = 189
buffer_index = 151 , len(sampled_ep) = 84, trace = from 51 to 59
ep_idx[i] = 330
buffer_index = 292 , len(sampled_ep) = 175, trace = from 129 to 137
ep_idx[i] = 312
buffer_index = 274 , len(sampled_ep) = 108, trace = from 73 to 81
ep_idx[i] = 90
buffer_index = 52 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 296 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 299
buffer_index = 261 , len(sampl

epsilon is = 0.9975895000000199
epsilon is = 0.9975880000000199
epsilon is = 0.9975865000000199
epsilon is = 0.9975850000000199
Target Set Success
ep_idx = [92, 334, 303, 71, 92, 139, 160, 134, 219, 295, 245, 71, 134, 233, 171, 56, 123, 290, 159, 220, 139, 237, 162, 324, 106, 148, 150, 185, 70, 227, 118, 141]
exp_idx = [158, 77, 16, 145, 66, 38, 43, 25, 81, 29, 10, 258, 45, 15, 23, 114, 137, 75, 14, 28, 36, 21, 31, 71, 91, 74, 46, 64, 174, 80, 71, 24]
idx_offset = 38, self.episode_index = 337, len(self.buffer) = 300
ep_idx[i] = 92
buffer_index = 54 , len(sampled_ep) = 281, trace = from 151 to 159
ep_idx[i] = 334
buffer_index = 296 , len(sampled_ep) = 113, trace = from 70 to 78
ep_idx[i] = 303
buffer_index = 265 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 71
buffer_index = 33 , len(sampled_ep) = 299, trace = from 138 to 146
ep_idx[i] = 92
buffer_index = 54 , len(sampled_ep) = 281, trace = from 59 to 67
ep_idx[i] = 139
buffer_index = 101 , len(sampled_ep) = 133, trace = from

buffer_index = 71 , len(sampled_ep) = 48, trace = from 9 to 17
ep_idx[i] = 100
buffer_index = 62 , len(sampled_ep) = 102, trace = from 22 to 30
ep_idx[i] = 327
buffer_index = 289 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 320
buffer_index = 282 , len(sampled_ep) = 71, trace = from 42 to 50
ep_idx[i] = 330
buffer_index = 292 , len(sampled_ep) = 175, trace = from 148 to 156
ep_idx[i] = 107
buffer_index = 69 , len(sampled_ep) = 109, trace = from 77 to 85
ep_idx[i] = 143
buffer_index = 105 , len(sampled_ep) = 78, trace = from 46 to 54
ep_idx[i] = 127
buffer_index = 89 , len(sampled_ep) = 50, trace = from 8 to 16
ep_idx[i] = 152
buffer_index = 114 , len(sampled_ep) = 228, trace = from 48 to 56
ep_idx[i] = 265
buffer_index = 227 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 134
buffer_index = 96 , len(sampled_ep) = 208, trace = from 157 to 165
ep_idx[i] = 202
buffer_index = 164 , len(sampled_ep) = 143, trace = from 80 to 88
ep_idx[i] = 235
buffer_index = 197 , len(

idx_offset = 38, self.episode_index = 337, len(self.buffer) = 300
ep_idx[i] = 155
buffer_index = 117 , len(sampled_ep) = 77, trace = from 8 to 16
ep_idx[i] = 160
buffer_index = 122 , len(sampled_ep) = 117, trace = from 17 to 25
ep_idx[i] = 138
buffer_index = 100 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 86
buffer_index = 48 , len(sampled_ep) = 77, trace = from 1 to 9
ep_idx[i] = 334
buffer_index = 296 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 56
buffer_index = 18 , len(sampled_ep) = 198, trace = from 140 to 148
ep_idx[i] = 220
buffer_index = 182 , len(sampled_ep) = 118, trace = from 76 to 84
ep_idx[i] = 179
buffer_index = 141 , len(sampled_ep) = 77, trace = from 29 to 37
ep_idx[i] = 199
buffer_index = 161 , len(sampled_ep) = 116, trace = from 26 to 34
ep_idx[i] = 144
buffer_index = 106 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 226
buffer_index = 188 , len(sampled_ep) = 120, trace = from 51 to 59
ep_idx[i] = 334
buffer_index = 296 , len(s

buffer_index = 88 , len(sampled_ep) = 50, trace = from 31 to 39
ep_idx[i] = 70
buffer_index = 31 , len(sampled_ep) = 198, trace = from 117 to 125
ep_idx[i] = 254
buffer_index = 215 , len(sampled_ep) = 116, trace = from 71 to 79
ep_idx[i] = 297
buffer_index = 258 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 327
buffer_index = 288 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 336
buffer_index = 297 , len(sampled_ep) = 102, trace = from 19 to 27
ep_idx[i] = 137
buffer_index = 98 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 45
buffer_index = 6 , len(sampled_ep) = 100, trace = from 1 to 9
ep_idx[i] = 102
buffer_index = 63 , len(sampled_ep) = 103, trace = from 19 to 27
ep_idx[i] = 70
buffer_index = 31 , len(sampled_ep) = 198, trace = from 9 to 17
ep_idx[i] = 335
buffer_index = 296 , len(sampled_ep) = 81, trace = from 28 to 36
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9975310000000204
epsilon is = 0.9975295000000204
epsilon is = 0.9975280000000204
ep

buffer_index = 295 , len(sampled_ep) = 113, trace = from 95 to 103
ep_idx[i] = 87
buffer_index = 48 , len(sampled_ep) = 76, trace = from 13 to 21
ep_idx[i] = 84
buffer_index = 45 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 284
buffer_index = 245 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 57
buffer_index = 18 , len(sampled_ep) = 110, trace = from 5 to 13
ep_idx[i] = 152
buffer_index = 113 , len(sampled_ep) = 228, trace = from 184 to 192
ep_idx[i] = 288
buffer_index = 249 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 110
buffer_index = 71 , len(sampled_ep) = 137, trace = from 100 to 108
ep_idx[i] = 295
buffer_index = 256 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 147
buffer_index = 108 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 134
buffer_index = 95 , len(sampled_ep) = 208, trace = from 31 to 39
ep_idx[i] = 231
buffer_index = 192 , len(sampled_ep) = 77, trace = from 10 to 18
ep_idx[i] = 99
buffer_index = 60 , len(sample

ep_idx[i] = 47
buffer_index = 8 , len(sampled_ep) = 83, trace = from 11 to 19
ep_idx[i] = 278
buffer_index = 239 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 164
buffer_index = 125 , len(sampled_ep) = 118, trace = from 109 to 117
ep_idx[i] = 92
buffer_index = 53 , len(sampled_ep) = 281, trace = from 103 to 111
ep_idx[i] = 312
buffer_index = 273 , len(sampled_ep) = 108, trace = from 99 to 107
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9974935000000207
epsilon is = 0.9974920000000207
epsilon is = 0.9974905000000207
epsilon is = 0.9974890000000207
epsilon is = 0.9974875000000207
Target Set Success
ep_idx = [285, 84, 232, 291, 89, 291, 290, 115, 64, 107, 56, 262, 70, 174, 156, 234, 334, 215, 156, 334, 44, 295, 223, 48, 87, 334, 336, 93, 214, 311, 330, 84]
exp_idx = [28, 10, 17, 37, 13, 93, 109, 166, 15, 73, 170, 38, 81, 22, 16, 105, 97, 28, 31, 107, 36, 51, 37, 44, 46, 80, 18, 49, 53, 20, 160, 43]
idx_offset = 39, self.episode_index = 338, len(self.buffer) = 300
ep_idx[i]

buffer_index = 250 , len(sampled_ep) = 83, trace = from 20 to 28
ep_idx[i] = 297
buffer_index = 258 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 246
buffer_index = 207 , len(sampled_ep) = 47, trace = from 9 to 17
ep_idx[i] = 265
buffer_index = 226 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 322
buffer_index = 283 , len(sampled_ep) = 111, trace = from 46 to 54
ep_idx[i] = 206
buffer_index = 167 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 256
buffer_index = 217 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 210
buffer_index = 171 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 153
buffer_index = 114 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 90
buffer_index = 51 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 98
buffer_index = 59 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 217
buffer_index = 178 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 135
buffer_index = 96 , len(sampled_e

epsilon is = 0.997454500000021
epsilon is = 0.997453000000021
epsilon is = 0.997451500000021
epsilon is = 0.997450000000021
Target Set Success
ep_idx = [253, 319, 303, 92, 135, 236, 320, 40, 239, 47, 120, 71, 161, 112, 217, 92, 45, 245, 154, 122, 332, 227, 43, 252, 184, 281, 305, 152, 71, 309, 50, 76]
exp_idx = [46, 21, 31, 220, 145, 66, 15, 22, 55, 18, 71, 178, 56, 25, 42, 22, 8, 57, 71, 58, 77, 16, 45, 27, 16, 132, 13, 219, 76, 78, 24, 175]
idx_offset = 39, self.episode_index = 338, len(self.buffer) = 300
ep_idx[i] = 253
buffer_index = 214 , len(sampled_ep) = 76, trace = from 39 to 47
ep_idx[i] = 319
buffer_index = 280 , len(sampled_ep) = 70, trace = from 14 to 22
ep_idx[i] = 303
buffer_index = 264 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 92
buffer_index = 53 , len(sampled_ep) = 281, trace = from 213 to 221
ep_idx[i] = 135
buffer_index = 96 , len(sampled_ep) = 211, trace = from 138 to 146
ep_idx[i] = 236
buffer_index = 197 , len(sampled_ep) = 81, trace = from 59 to 6

buffer_index = 149 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 334
buffer_index = 295 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 227
buffer_index = 188 , len(sampled_ep) = 112, trace = from 69 to 77
ep_idx[i] = 287
buffer_index = 248 , len(sampled_ep) = 194, trace = from 93 to 101
ep_idx[i] = 322
buffer_index = 283 , len(sampled_ep) = 111, trace = from 32 to 40
ep_idx[i] = 261
buffer_index = 222 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 62
buffer_index = 23 , len(sampled_ep) = 77, trace = from 10 to 18
ep_idx[i] = 45
buffer_index = 6 , len(sampled_ep) = 100, trace = from 89 to 97
ep_idx[i] = 112
buffer_index = 73 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 215
buffer_index = 176 , len(sampled_ep) = 117, trace = from 21 to 29
ep_idx[i] = 214
buffer_index = 175 , len(sampled_ep) = 75, trace = from 52 to 60
ep_idx[i] = 245
buffer_index = 206 , len(sampled_ep) = 75, trace = from 21 to 29
ep_idx[i] = 239
buffer_index = 200 , len(s

exp_idx = [32, 109, 52, 109, 17, 25, 102, 71, 12, 30, 89, 36, 49, 11, 60, 10, 68, 78, 85, 65, 240, 26, 85, 109, 81, 37, 19, 52, 83, 77, 118, 16]
idx_offset = 40, self.episode_index = 339, len(self.buffer) = 300
ep_idx[i] = 130
buffer_index = 90 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 334
buffer_index = 294 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 202
buffer_index = 162 , len(sampled_ep) = 143, trace = from 45 to 53
ep_idx[i] = 222
buffer_index = 182 , len(sampled_ep) = 224, trace = from 102 to 110
ep_idx[i] = 197
buffer_index = 157 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 133
buffer_index = 93 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 123
buffer_index = 83 , len(sampled_ep) = 198, trace = from 95 to 103
ep_idx[i] = 274
buffer_index = 234 , len(sampled_ep) = 114, trace = from 64 to 72
ep_idx[i] = 86
buffer_index = 46 , len(sampled_ep) = 77, trace = from 5 to 13
ep_idx[i] = 330
buffer_index = 290 , len(sampled_ep) = 1

ep_idx[i] = 302
buffer_index = 262 , len(sampled_ep) = 79, trace = from 39 to 47
ep_idx[i] = 151
buffer_index = 111 , len(sampled_ep) = 82, trace = from 23 to 31
ep_idx[i] = 299
buffer_index = 259 , len(sampled_ep) = 100, trace = from 41 to 49
ep_idx[i] = 202
buffer_index = 162 , len(sampled_ep) = 143, trace = from 45 to 53
ep_idx[i] = 73
buffer_index = 33 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 48
buffer_index = 8 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 234
buffer_index = 194 , len(sampled_ep) = 167, trace = from 51 to 59
ep_idx[i] = 211
buffer_index = 171 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 285
buffer_index = 245 , len(sampled_ep) = 166, trace = from 101 to 109
ep_idx[i] = 144
buffer_index = 104 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 43
buffer_index = 3 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 220
buffer_index = 180 , len(sampled_ep) = 118, trace = from 15 to 23
sampledTraces.shape = (32, 8, 

buffer_index = 0 , len(sampled_ep) = 116, trace = from 79 to 87
ep_idx[i] = 124
buffer_index = 84 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 106
buffer_index = 66 , len(sampled_ep) = 103, trace = from 25 to 33
ep_idx[i] = 43
buffer_index = 3 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 265
buffer_index = 225 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 140
buffer_index = 100 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 281
buffer_index = 241 , len(sampled_ep) = 178, trace = from 49 to 57
ep_idx[i] = 43
buffer_index = 3 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 78
buffer_index = 38 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 217
buffer_index = 177 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 177
buffer_index = 137 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 71
buffer_index = 31 , len(sampled_ep) = 299, trace = from 231 to 239
ep_idx[i] = 252
buffer_index = 212 , len(sampled_ep) 

buffer_index = 112 , len(sampled_ep) = 228, trace = from 7 to 15
ep_idx[i] = 211
buffer_index = 171 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 215
buffer_index = 175 , len(sampled_ep) = 117, trace = from 1 to 9
ep_idx[i] = 132
buffer_index = 92 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 60
buffer_index = 20 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 101
buffer_index = 61 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 92
buffer_index = 52 , len(sampled_ep) = 281, trace = from 233 to 241
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9973585000000218
epsilon is = 0.9973570000000218
epsilon is = 0.9973555000000218
epsilon is = 0.9973540000000218
epsilon is = 0.9973525000000218
Target Set Success
ep_idx = [285, 288, 115, 286, 137, 257, 281, 334, 227, 123, 141, 190, 166, 332, 257, 328, 220, 310, 175, 56, 240, 162, 256, 151, 205, 92, 337, 196, 57, 196, 44, 120]
exp_idx = [128, 10, 92, 67, 31, 15, 153, 11, 54, 108, 48, 12, 16, 65, 21, 7

buffer_index = 43 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 254
buffer_index = 214 , len(sampled_ep) = 116, trace = from 38 to 46
ep_idx[i] = 102
buffer_index = 62 , len(sampled_ep) = 103, trace = from 7 to 15
ep_idx[i] = 258
buffer_index = 218 , len(sampled_ep) = 76, trace = from 25 to 33
ep_idx[i] = 332
buffer_index = 292 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 331
buffer_index = 291 , len(sampled_ep) = 84, trace = from 22 to 30
ep_idx[i] = 115
buffer_index = 75 , len(sampled_ep) = 299, trace = from 113 to 121
ep_idx[i] = 93
buffer_index = 53 , len(sampled_ep) = 77, trace = from 40 to 48
ep_idx[i] = 264
buffer_index = 224 , len(sampled_ep) = 109, trace = from 42 to 50
ep_idx[i] = 281
buffer_index = 241 , len(sampled_ep) = 178, trace = from 109 to 117
ep_idx[i] = 55
buffer_index = 15 , len(sampled_ep) = 76, trace = from 4 to 12
ep_idx[i] = 135
buffer_index = 95 , len(sampled_ep) = 211, trace = from 49 to 57
ep_idx[i] = 161
buffer_index = 121 , len(sam

buffer_index = 289 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 187
buffer_index = 147 , len(sampled_ep) = 84, trace = from 43 to 51
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9973210000000221
epsilon is = 0.9973195000000221
epsilon is = 0.9973180000000221
epsilon is = 0.9973165000000221
epsilon is = 0.9973150000000222
Target Set Success
ep_idx = [227, 44, 215, 110, 130, 337, 339, 85, 334, 219, 78, 190, 260, 43, 102, 242, 283, 239, 322, 118, 245, 60, 282, 251, 70, 123, 304, 44, 309, 180, 257, 227]
exp_idx = [56, 72, 39, 108, 37, 58, 73, 50, 107, 11, 16, 12, 55, 40, 15, 79, 63, 86, 65, 75, 10, 47, 20, 44, 136, 155, 28, 39, 104, 75, 51, 103]
idx_offset = 40, self.episode_index = 339, len(self.buffer) = 300
ep_idx[i] = 227
buffer_index = 187 , len(sampled_ep) = 112, trace = from 49 to 57
ep_idx[i] = 44
buffer_index = 4 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 215
buffer_index = 175 , len(sampled_ep) = 117, trace = from 32 to 40
ep_idx[i] = 110
buffer_ind

buffer_index = 227 , len(sampled_ep) = 83, trace = from 56 to 64
ep_idx[i] = 163
buffer_index = 123 , len(sampled_ep) = 134, trace = from 93 to 101
ep_idx[i] = 320
buffer_index = 280 , len(sampled_ep) = 71, trace = from 2 to 10
ep_idx[i] = 114
buffer_index = 74 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 296
buffer_index = 256 , len(sampled_ep) = 108, trace = from 67 to 75
ep_idx[i] = 273
buffer_index = 233 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 139
buffer_index = 99 , len(sampled_ep) = 133, trace = from 15 to 23
ep_idx[i] = 170
buffer_index = 130 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 336
buffer_index = 296 , len(sampled_ep) = 102, trace = from 14 to 22
ep_idx[i] = 332
buffer_index = 292 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 149
buffer_index = 109 , len(sampled_ep) = 82, trace = from 50 to 58
ep_idx[i] = 296
buffer_index = 256 , len(sampled_ep) = 108, trace = from 44 to 52
ep_idx[i] = 51
buffer_index = 11 , len(sam

ep_idx = [83, 334, 264, 51, 269, 89, 199, 166, 253, 50, 108, 311, 177, 115, 71, 336, 50, 286, 193, 97, 115, 160, 163, 166, 184, 123, 249, 279, 234, 122, 281, 194]
exp_idx = [19, 68, 75, 16, 7, 29, 24, 70, 57, 17, 22, 51, 17, 193, 234, 49, 63, 95, 36, 12, 15, 17, 30, 31, 41, 193, 63, 26, 154, 73, 113, 53]
idx_offset = 40, self.episode_index = 339, len(self.buffer) = 300
ep_idx[i] = 83
buffer_index = 43 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 334
buffer_index = 294 , len(sampled_ep) = 113, trace = from 61 to 69
ep_idx[i] = 264
buffer_index = 224 , len(sampled_ep) = 109, trace = from 68 to 76
ep_idx[i] = 51
buffer_index = 11 , len(sampled_ep) = 145, trace = from 9 to 17
ep_idx[i] = 269
buffer_index = 229 , len(sampled_ep) = 45, trace = from 0 to 8
ep_idx[i] = 89
buffer_index = 49 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 199
buffer_index = 159 , len(sampled_ep) = 116, trace = from 17 to 25
ep_idx[i] = 166
buffer_index = 126 , len(sampled_ep) = 81, trace =

buffer_index = 187 , len(sampled_ep) = 112, trace = from 29 to 37
ep_idx[i] = 70
buffer_index = 30 , len(sampled_ep) = 198, trace = from 49 to 57
ep_idx[i] = 178
buffer_index = 138 , len(sampled_ep) = 80, trace = from 52 to 60
ep_idx[i] = 337
buffer_index = 297 , len(sampled_ep) = 82, trace = from 73 to 81
ep_idx[i] = 213
buffer_index = 173 , len(sampled_ep) = 148, trace = from 24 to 32
ep_idx[i] = 135
buffer_index = 95 , len(sampled_ep) = 211, trace = from 55 to 63
ep_idx[i] = 81
buffer_index = 41 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 331
buffer_index = 291 , len(sampled_ep) = 84, trace = from 62 to 70
ep_idx[i] = 285
buffer_index = 245 , len(sampled_ep) = 166, trace = from 63 to 71
ep_idx[i] = 82
buffer_index = 42 , len(sampled_ep) = 73, trace = from 54 to 62
ep_idx[i] = 335
buffer_index = 295 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 283
buffer_index = 243 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 317
buffer_index = 277 , len(sampl

buffer_index = 290 , len(sampled_ep) = 175, trace = from 58 to 66
ep_idx[i] = 187
buffer_index = 147 , len(sampled_ep) = 84, trace = from 0 to 8
ep_idx[i] = 256
buffer_index = 216 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 306
buffer_index = 266 , len(sampled_ep) = 117, trace = from 85 to 93
ep_idx[i] = 260
buffer_index = 220 , len(sampled_ep) = 108, trace = from 40 to 48
ep_idx[i] = 210
buffer_index = 170 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 297
buffer_index = 257 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 123
buffer_index = 83 , len(sampled_ep) = 198, trace = from 32 to 40
ep_idx[i] = 251
buffer_index = 211 , len(sampled_ep) = 133, trace = from 71 to 79
ep_idx[i] = 84
buffer_index = 44 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 133
buffer_index = 93 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 308
buffer_index = 268 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 330
buffer_index = 290 , len(sampled

ep_idx[i] = 291
buffer_index = 251 , len(sampled_ep) = 103, trace = from 22 to 30
ep_idx[i] = 273
buffer_index = 233 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 327
buffer_index = 287 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 229
buffer_index = 189 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 110
buffer_index = 70 , len(sampled_ep) = 137, trace = from 54 to 62
ep_idx[i] = 256
buffer_index = 216 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 71
buffer_index = 31 , len(sampled_ep) = 299, trace = from 1 to 9
ep_idx[i] = 256
buffer_index = 216 , len(sampled_ep) = 81, trace = from 66 to 74
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9972235000000229
epsilon is = 0.9972220000000229
epsilon is = 0.9972205000000229
epsilon is = 0.997219000000023
epsilon is = 0.997217500000023
Target Set Success
ep_idx = [100, 193, 117, 233, 80, 157, 53, 180, 145, 153, 76, 219, 133, 116, 122, 138, 92, 154, 74, 110, 98, 287, 118, 334, 267, 336, 207, 161, 3

buffer_index = 289 , len(sampled_ep) = 175, trace = from 159 to 167
ep_idx[i] = 193
buffer_index = 152 , len(sampled_ep) = 108, trace = from 38 to 46
ep_idx[i] = 127
buffer_index = 86 , len(sampled_ep) = 50, trace = from 10 to 18
ep_idx[i] = 140
buffer_index = 99 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 116
buffer_index = 75 , len(sampled_ep) = 162, trace = from 7 to 15
ep_idx[i] = 293
buffer_index = 252 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 214
buffer_index = 173 , len(sampled_ep) = 75, trace = from 31 to 39
ep_idx[i] = 329
buffer_index = 288 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 88
buffer_index = 47 , len(sampled_ep) = 114, trace = from 93 to 101
ep_idx[i] = 329
buffer_index = 288 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 105
buffer_index = 64 , len(sampled_ep) = 74, trace = from 43 to 51
ep_idx[i] = 195
buffer_index = 154 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 81
buffer_index = 40 , len(sampled

buffer_index = 250 , len(sampled_ep) = 103, trace = from 24 to 32
ep_idx[i] = 238
buffer_index = 197 , len(sampled_ep) = 79, trace = from 57 to 65
ep_idx[i] = 141
buffer_index = 100 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 276
buffer_index = 235 , len(sampled_ep) = 81, trace = from 10 to 18
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9971860000000232
epsilon is = 0.9971845000000232
epsilon is = 0.9971830000000232
epsilon is = 0.9971815000000233
epsilon is = 0.9971800000000233
Target Set Success
ep_idx = [334, 199, 264, 64, 44, 164, 123, 334, 334, 302, 77, 322, 151, 41, 198, 233, 281, 144, 178, 141, 182, 175, 87, 177, 71, 164, 264, 332, 48, 285, 56, 104]
exp_idx = [107, 33, 106, 16, 12, 99, 11, 107, 97, 19, 65, 21, 17, 19, 12, 35, 24, 37, 61, 7, 56, 94, 44, 27, 178, 75, 85, 22, 55, 140, 69, 40]
idx_offset = 41, self.episode_index = 340, len(self.buffer) = 300
ep_idx[i] = 334
buffer_index = 293 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 199
buffer_i

buffer_index = 48 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 135
buffer_index = 94 , len(sampled_ep) = 211, trace = from 150 to 158
ep_idx[i] = 329
buffer_index = 288 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 238
buffer_index = 197 , len(sampled_ep) = 79, trace = from 37 to 45
ep_idx[i] = 250
buffer_index = 209 , len(sampled_ep) = 49, trace = from 6 to 14
ep_idx[i] = 70
buffer_index = 29 , len(sampled_ep) = 198, trace = from 12 to 20
ep_idx[i] = 197
buffer_index = 156 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 150
buffer_index = 109 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 191
buffer_index = 150 , len(sampled_ep) = 131, trace = from 41 to 49
ep_idx[i] = 334
buffer_index = 293 , len(sampled_ep) = 113, trace = from 45 to 53
ep_idx[i] = 323
buffer_index = 282 , len(sampled_ep) = 43, trace = from 5 to 13
ep_idx[i] = 331
buffer_index = 290 , len(sampled_ep) = 84, trace = from 32 to 40
ep_idx[i] = 337
buffer_index = 296 , len(samp

epsilon is = 0.9971425000000236
Target Set Success
ep_idx = [234, 262, 331, 216, 278, 136, 213, 76, 283, 330, 313, 277, 254, 164, 97, 334, 268, 282, 301, 180, 299, 136, 105, 49, 197, 170, 50, 334, 189, 155, 333, 294]
exp_idx = [139, 16, 63, 77, 29, 10, 8, 172, 33, 84, 62, 72, 24, 95, 15, 43, 26, 13, 14, 132, 13, 44, 46, 66, 16, 28, 45, 103, 27, 48, 36, 41]
idx_offset = 41, self.episode_index = 340, len(self.buffer) = 300
ep_idx[i] = 234
buffer_index = 193 , len(sampled_ep) = 167, trace = from 132 to 140
ep_idx[i] = 262
buffer_index = 221 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 331
buffer_index = 290 , len(sampled_ep) = 84, trace = from 56 to 64
ep_idx[i] = 216
buffer_index = 175 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 278
buffer_index = 237 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 136
buffer_index = 95 , len(sampled_ep) = 108, trace = from 3 to 11
ep_idx[i] = 213
buffer_index = 172 , len(sampled_ep) = 148, trace = from 1 to 9
ep_idx[i

ep_idx[i] = 225
buffer_index = 183 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 314
buffer_index = 272 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 311
buffer_index = 269 , len(sampled_ep) = 84, trace = from 3 to 11
ep_idx[i] = 313
buffer_index = 271 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 134
buffer_index = 92 , len(sampled_ep) = 208, trace = from 12 to 20
ep_idx[i] = 146
buffer_index = 104 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 285
buffer_index = 243 , len(sampled_ep) = 166, trace = from 123 to 131
ep_idx[i] = 85
buffer_index = 43 , len(sampled_ep) = 108, trace = from 67 to 75
ep_idx[i] = 263
buffer_index = 221 , len(sampled_ep) = 51, trace = from 27 to 35
ep_idx[i] = 323
buffer_index = 281 , len(sampled_ep) = 43, trace = from 14 to 22
ep_idx[i] = 193
buffer_index = 151 , len(sampled_ep) = 108, trace = from 7 to 15
ep_idx[i] = 332
buffer_index = 290 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 326
buffer_inde

buffer_index = 30 , len(sampled_ep) = 82, trace = from 51 to 59
ep_idx[i] = 332
buffer_index = 290 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 290
buffer_index = 248 , len(sampled_ep) = 152, trace = from 40 to 48
ep_idx[i] = 298
buffer_index = 256 , len(sampled_ep) = 115, trace = from 95 to 103
ep_idx[i] = 295
buffer_index = 253 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 334
buffer_index = 292 , len(sampled_ep) = 113, trace = from 33 to 41
ep_idx[i] = 105
buffer_index = 63 , len(sampled_ep) = 74, trace = from 44 to 52
ep_idx[i] = 237
buffer_index = 195 , len(sampled_ep) = 116, trace = from 43 to 51
ep_idx[i] = 337
buffer_index = 295 , len(sampled_ep) = 82, trace = from 15 to 23
ep_idx[i] = 108
buffer_index = 66 , len(sampled_ep) = 45, trace = from 2 to 10
ep_idx[i] = 334
buffer_index = 292 , len(sampled_ep) = 113, trace = from 66 to 74
ep_idx[i] = 222
buffer_index = 180 , len(sampled_ep) = 224, trace = from 9 to 17
ep_idx[i] = 140
buffer_index = 98 , len(sa

buffer_index = 160 , len(sampled_ep) = 143, trace = from 90 to 98
ep_idx[i] = 151
buffer_index = 109 , len(sampled_ep) = 82, trace = from 9 to 17
ep_idx[i] = 234
buffer_index = 192 , len(sampled_ep) = 167, trace = from 32 to 40
ep_idx[i] = 278
buffer_index = 236 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 201
buffer_index = 159 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 245
buffer_index = 203 , len(sampled_ep) = 75, trace = from 37 to 45
ep_idx[i] = 178
buffer_index = 136 , len(sampled_ep) = 80, trace = from 43 to 51
ep_idx[i] = 185
buffer_index = 143 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 170
buffer_index = 128 , len(sampled_ep) = 81, trace = from 10 to 18
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.997088500000024
epsilon is = 0.997087000000024
epsilon is = 0.997085500000024
epsilon is = 0.9970840000000241
epsilon is = 0.9970825000000241
Target Set Success
ep_idx = [86, 261, 291, 153, 57, 271, 103, 287, 290, 135, 232, 92, 205, 89, 1

buffer_index = 108 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 337
buffer_index = 295 , len(sampled_ep) = 82, trace = from 6 to 14
ep_idx[i] = 334
buffer_index = 292 , len(sampled_ep) = 113, trace = from 92 to 100
ep_idx[i] = 43
buffer_index = 1 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 248
buffer_index = 206 , len(sampled_ep) = 77, trace = from 25 to 33
ep_idx[i] = 202
buffer_index = 160 , len(sampled_ep) = 143, trace = from 104 to 112
ep_idx[i] = 45
buffer_index = 3 , len(sampled_ep) = 100, trace = from 15 to 23
ep_idx[i] = 221
buffer_index = 179 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 70
buffer_index = 28 , len(sampled_ep) = 198, trace = from 66 to 74
ep_idx[i] = 226
buffer_index = 184 , len(sampled_ep) = 120, trace = from 22 to 30
ep_idx[i] = 162
buffer_index = 120 , len(sampled_ep) = 112, trace = from 30 to 38
ep_idx[i] = 145
buffer_index = 103 , len(sampled_ep) = 117, trace = from 108 to 116
ep_idx[i] = 213
buffer_index = 171 , len(

buffer_index = 256 , len(sampled_ep) = 115, trace = from 30 to 38
ep_idx[i] = 197
buffer_index = 155 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 115
buffer_index = 73 , len(sampled_ep) = 299, trace = from 8 to 16
ep_idx[i] = 45
buffer_index = 3 , len(sampled_ep) = 100, trace = from 7 to 15
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9970510000000243
epsilon is = 0.9970495000000243
epsilon is = 0.9970480000000244
epsilon is = 0.9970465000000244
epsilon is = 0.9970450000000244
Target Set Success
ep_idx = [312, 113, 97, 192, 135, 222, 231, 284, 131, 176, 140, 260, 307, 313, 154, 334, 252, 116, 82, 251, 240, 222, 111, 109, 238, 168, 138, 222, 334, 317, 61, 285]
exp_idx = [13, 46, 16, 32, 29, 189, 32, 72, 72, 19, 52, 89, 22, 77, 22, 36, 65, 121, 17, 49, 29, 157, 16, 32, 42, 61, 42, 190, 111, 77, 29, 134]
idx_offset = 42, self.episode_index = 341, len(self.buffer) = 300
ep_idx[i] = 312
buffer_index = 270 , len(sampled_ep) = 108, trace = from 6 to 14
ep_idx[i] = 113
buffer_i

buffer_index = 269 , len(sampled_ep) = 84, trace = from 12 to 20
ep_idx[i] = 196
buffer_index = 154 , len(sampled_ep) = 135, trace = from 68 to 76
ep_idx[i] = 321
buffer_index = 279 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 133
buffer_index = 91 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 220
buffer_index = 178 , len(sampled_ep) = 118, trace = from 37 to 45
ep_idx[i] = 144
buffer_index = 102 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 193
buffer_index = 151 , len(sampled_ep) = 108, trace = from 34 to 42
ep_idx[i] = 223
buffer_index = 181 , len(sampled_ep) = 112, trace = from 88 to 96
ep_idx[i] = 287
buffer_index = 245 , len(sampled_ep) = 194, trace = from 128 to 136
ep_idx[i] = 103
buffer_index = 61 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 294
buffer_index = 252 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 125
buffer_index = 83 , len(sampled_ep) = 75, trace = from 10 to 18
ep_idx[i] = 77
buffer_index = 35 , len(sa

epsilon is = 0.9970105000000247
epsilon is = 0.9970090000000247
epsilon is = 0.9970075000000247
Target Set Success
ep_idx = [123, 336, 117, 110, 161, 273, 330, 147, 339, 330, 325, 178, 55, 202, 334, 314, 297, 338, 337, 190, 75, 116, 230, 135, 254, 250, 202, 135, 107, 45, 338, 147]
exp_idx = [139, 41, 38, 62, 13, 17, 110, 43, 36, 87, 24, 47, 59, 39, 76, 19, 58, 19, 64, 12, 20, 143, 18, 22, 86, 14, 95, 42, 83, 63, 48, 16]
idx_offset = 43, self.episode_index = 342, len(self.buffer) = 300
ep_idx[i] = 123
buffer_index = 80 , len(sampled_ep) = 198, trace = from 132 to 140
ep_idx[i] = 336
buffer_index = 293 , len(sampled_ep) = 102, trace = from 34 to 42
ep_idx[i] = 117
buffer_index = 74 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 110
buffer_index = 67 , len(sampled_ep) = 137, trace = from 55 to 63
ep_idx[i] = 161
buffer_index = 118 , len(sampled_ep) = 114, trace = from 6 to 14
ep_idx[i] = 273
buffer_index = 230 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 330
buffer

buffer_index = 152 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 72
buffer_index = 29 , len(sampled_ep) = 82, trace = from 62 to 70
ep_idx[i] = 91
buffer_index = 48 , len(sampled_ep) = 46, trace = from 7 to 15
ep_idx[i] = 295
buffer_index = 252 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 242
buffer_index = 199 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 97
buffer_index = 54 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 191
buffer_index = 148 , len(sampled_ep) = 131, trace = from 18 to 26
ep_idx[i] = 266
buffer_index = 223 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 71
buffer_index = 28 , len(sampled_ep) = 299, trace = from 75 to 83
ep_idx[i] = 337
buffer_index = 294 , len(sampled_ep) = 82, trace = from 16 to 24
ep_idx[i] = 176
buffer_index = 133 , len(sampled_ep) = 80, trace = from 55 to 63
ep_idx[i] = 252
buffer_index = 209 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 262
buffer_index = 219 , len(sampled_ep) 

idx_offset = 43, self.episode_index = 342, len(self.buffer) = 300
ep_idx[i] = 221
buffer_index = 178 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 218
buffer_index = 175 , len(sampled_ep) = 80, trace = from 14 to 22
ep_idx[i] = 211
buffer_index = 168 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 342
buffer_index = 299 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 289
buffer_index = 246 , len(sampled_ep) = 83, trace = from 16 to 24
ep_idx[i] = 249
buffer_index = 206 , len(sampled_ep) = 75, trace = from 25 to 33
ep_idx[i] = 298
buffer_index = 255 , len(sampled_ep) = 115, trace = from 35 to 43
ep_idx[i] = 95
buffer_index = 52 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 326
buffer_index = 283 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 255
buffer_index = 212 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 239
buffer_index = 196 , len(sampled_ep) = 101, trace = from 93 to 101
ep_idx[i] = 71
buffer_index = 28 , len(sam

buffer_index = 100 , len(sampled_ep) = 78, trace = from 44 to 52
ep_idx[i] = 324
buffer_index = 281 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 335
buffer_index = 292 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 311
buffer_index = 268 , len(sampled_ep) = 84, trace = from 40 to 48
ep_idx[i] = 256
buffer_index = 213 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 76
buffer_index = 33 , len(sampled_ep) = 198, trace = from 46 to 54
ep_idx[i] = 179
buffer_index = 136 , len(sampled_ep) = 77, trace = from 5 to 13
ep_idx[i] = 72
buffer_index = 29 , len(sampled_ep) = 82, trace = from 74 to 82
ep_idx[i] = 334
buffer_index = 291 , len(sampled_ep) = 113, trace = from 11 to 19
ep_idx[i] = 172
buffer_index = 129 , len(sampled_ep) = 81, trace = from 16 to 24
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9969535000000251
epsilon is = 0.9969520000000252
epsilon is = 0.9969505000000252
epsilon is = 0.9969490000000252
epsilon is = 0.9969475000000252
Target Set Succe

buffer_index = 282 , len(sampled_ep) = 82, trace = from 8 to 16
ep_idx[i] = 94
buffer_index = 51 , len(sampled_ep) = 45, trace = from 34 to 42
ep_idx[i] = 170
buffer_index = 127 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 265
buffer_index = 222 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 251
buffer_index = 208 , len(sampled_ep) = 133, trace = from 79 to 87
ep_idx[i] = 76
buffer_index = 33 , len(sampled_ep) = 198, trace = from 105 to 113
ep_idx[i] = 236
buffer_index = 193 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 138
buffer_index = 95 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 103
buffer_index = 60 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 110
buffer_index = 67 , len(sampled_ep) = 137, trace = from 44 to 52
ep_idx[i] = 175
buffer_index = 132 , len(sampled_ep) = 102, trace = from 44 to 52
ep_idx[i] = 90
buffer_index = 47 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 222
buffer_index = 179 , len(sampled_ep)

buffer_index = 150 , len(sampled_ep) = 108, trace = from 95 to 103
ep_idx[i] = 334
buffer_index = 291 , len(sampled_ep) = 113, trace = from 39 to 47
ep_idx[i] = 231
buffer_index = 188 , len(sampled_ep) = 77, trace = from 9 to 17
ep_idx[i] = 92
buffer_index = 49 , len(sampled_ep) = 281, trace = from 265 to 273
ep_idx[i] = 256
buffer_index = 213 , len(sampled_ep) = 81, trace = from 28 to 36
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9969160000000254
epsilon is = 0.9969145000000255
epsilon is = 0.9969130000000255
epsilon is = 0.9969115000000255
epsilon is = 0.9969100000000255
Target Set Success
ep_idx = [334, 103, 260, 339, 165, 206, 287, 316, 102, 63, 48, 281, 334, 62, 305, 249, 48, 332, 335, 162, 286, 141, 332, 283, 61, 296, 198, 107, 223, 104, 165, 143]
exp_idx = [110, 80, 44, 53, 77, 17, 52, 59, 7, 45, 75, 9, 104, 12, 47, 49, 16, 37, 18, 110, 32, 19, 69, 43, 45, 81, 12, 60, 108, 42, 35, 35]
idx_offset = 43, self.episode_index = 342, len(self.buffer) = 300
ep_idx[i] = 334
buffer_i

buffer_index = 202 , len(sampled_ep) = 75, trace = from 10 to 18
ep_idx[i] = 250
buffer_index = 207 , len(sampled_ep) = 49, trace = from 4 to 12
ep_idx[i] = 288
buffer_index = 245 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 61
buffer_index = 18 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 310
buffer_index = 267 , len(sampled_ep) = 164, trace = from 140 to 148
ep_idx[i] = 284
buffer_index = 241 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 332
buffer_index = 289 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 317
buffer_index = 274 , len(sampled_ep) = 84, trace = from 8 to 16
ep_idx[i] = 144
buffer_index = 101 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 342
buffer_index = 299 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 113
buffer_index = 70 , len(sampled_ep) = 105, trace = from 20 to 28
ep_idx[i] = 100
buffer_index = 57 , len(sampled_ep) = 102, trace = from 7 to 15
ep_idx[i] = 334
buffer_index = 291 , len(sampl

sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9968785000000258
epsilon is = 0.9968770000000258
epsilon is = 0.9968755000000258
epsilon is = 0.9968740000000258
epsilon is = 0.9968725000000258
Target Set Success
ep_idx = [157, 334, 68, 264, 52, 191, 71, 224, 151, 191, 223, 240, 125, 172, 100, 277, 276, 181, 299, 87, 163, 66, 330, 97, 266, 105, 71, 260, 325, 250, 267, 141]
exp_idx = [95, 65, 51, 101, 32, 46, 237, 7, 68, 23, 15, 17, 36, 66, 8, 69, 10, 12, 69, 43, 95, 21, 79, 66, 33, 45, 206, 59, 13, 21, 68, 59]
idx_offset = 44, self.episode_index = 343, len(self.buffer) = 300
ep_idx[i] = 157
buffer_index = 113 , len(sampled_ep) = 103, trace = from 88 to 96
ep_idx[i] = 334
buffer_index = 290 , len(sampled_ep) = 113, trace = from 58 to 66
ep_idx[i] = 68
buffer_index = 24 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 264
buffer_index = 220 , len(sampled_ep) = 109, trace = from 94 to 102
ep_idx[i] = 52
buffer_index = 8 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 191

buffer_index = 79 , len(sampled_ep) = 198, trace = from 134 to 142
ep_idx[i] = 236
buffer_index = 192 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 270
buffer_index = 226 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 306
buffer_index = 262 , len(sampled_ep) = 117, trace = from 12 to 20
ep_idx[i] = 177
buffer_index = 133 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 334
buffer_index = 290 , len(sampled_ep) = 113, trace = from 70 to 78
ep_idx[i] = 270
buffer_index = 226 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 232
buffer_index = 188 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 154
buffer_index = 110 , len(sampled_ep) = 153, trace = from 22 to 30
ep_idx[i] = 328
buffer_index = 284 , len(sampled_ep) = 75, trace = from 16 to 24
ep_idx[i] = 222
buffer_index = 178 , len(sampled_ep) = 224, trace = from 9 to 17
ep_idx[i] = 148
buffer_index = 104 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 100
buffer_index = 56 , len(s

exp_idx = [95, 37, 17, 17, 58, 56, 16, 106, 22, 59, 108, 22, 27, 85, 28, 15, 65, 15, 11, 25, 94, 86, 80, 17, 38, 50, 60, 203, 58, 75, 32, 137]
idx_offset = 44, self.episode_index = 343, len(self.buffer) = 300
ep_idx[i] = 334
buffer_index = 290 , len(sampled_ep) = 113, trace = from 88 to 96
ep_idx[i] = 316
buffer_index = 272 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 273
buffer_index = 229 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 165
buffer_index = 121 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 134
buffer_index = 90 , len(sampled_ep) = 208, trace = from 51 to 59
ep_idx[i] = 218
buffer_index = 174 , len(sampled_ep) = 80, trace = from 49 to 57
ep_idx[i] = 106
buffer_index = 62 , len(sampled_ep) = 103, trace = from 9 to 17
ep_idx[i] = 287
buffer_index = 243 , len(sampled_ep) = 194, trace = from 99 to 107
ep_idx[i] = 202
buffer_index = 158 , len(sampled_ep) = 143, trace = from 15 to 23
ep_idx[i] = 267
buffer_index = 223 , len(sampled_ep) = 83, 

ep_idx[i] = 147
buffer_index = 103 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 318
buffer_index = 274 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 197
buffer_index = 153 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 70
buffer_index = 26 , len(sampled_ep) = 198, trace = from 170 to 178
ep_idx[i] = 143
buffer_index = 99 , len(sampled_ep) = 78, trace = from 18 to 26
ep_idx[i] = 197
buffer_index = 153 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 218
buffer_index = 174 , len(sampled_ep) = 80, trace = from 33 to 41
ep_idx[i] = 250
buffer_index = 206 , len(sampled_ep) = 49, trace = from 0 to 8
ep_idx[i] = 324
buffer_index = 280 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 97
buffer_index = 53 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 262
buffer_index = 218 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 212
buffer_index = 168 , len(sampled_ep) = 81, trace = from 11 to 19
sampledTraces.shape = (32, 8, 6)


buffer_index = 270 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 90
buffer_index = 46 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 232
buffer_index = 188 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 252
buffer_index = 208 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 163
buffer_index = 119 , len(sampled_ep) = 134, trace = from 14 to 22
ep_idx[i] = 227
buffer_index = 183 , len(sampled_ep) = 112, trace = from 12 to 20
ep_idx[i] = 317
buffer_index = 273 , len(sampled_ep) = 84, trace = from 23 to 31
ep_idx[i] = 268
buffer_index = 224 , len(sampled_ep) = 120, trace = from 12 to 20
ep_idx[i] = 185
buffer_index = 141 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 248
buffer_index = 204 , len(sampled_ep) = 77, trace = from 34 to 42
ep_idx[i] = 222
buffer_index = 178 , len(sampled_ep) = 224, trace = from 169 to 177
ep_idx[i] = 115
buffer_index = 71 , len(sampled_ep) = 299, trace = from 91 to 99
ep_idx[i] = 189
buffer_index = 145 , len(sa

buffer_index = 272 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 232
buffer_index = 188 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 221
buffer_index = 177 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 290 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 324
buffer_index = 280 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 309
buffer_index = 265 , len(sampled_ep) = 115, trace = from 93 to 101
ep_idx[i] = 327
buffer_index = 283 , len(sampled_ep) = 81, trace = from 21 to 29
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9967810000000266
epsilon is = 0.9967795000000266
epsilon is = 0.9967780000000266
epsilon is = 0.9967765000000266
epsilon is = 0.9967750000000266
Target Set Success
ep_idx = [131, 188, 332, 119, 68, 134, 135, 158, 332, 178, 71, 111, 322, 102, 277, 261, 90, 76, 211, 221, 50, 248, 288, 69, 299, 206, 215, 121, 107, 177, 300, 124]
exp_idx = [51, 57, 21, 20, 16, 202, 46, 43, 46, 24, 73, 12, 87, 18, 

buffer_index = 181 , len(sampled_ep) = 120, trace = from 107 to 115
ep_idx[i] = 322
buffer_index = 277 , len(sampled_ep) = 111, trace = from 17 to 25
ep_idx[i] = 101
buffer_index = 56 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 220
buffer_index = 175 , len(sampled_ep) = 118, trace = from 110 to 118
ep_idx[i] = 83
buffer_index = 38 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 119
buffer_index = 74 , len(sampled_ep) = 183, trace = from 169 to 177
ep_idx[i] = 68
buffer_index = 23 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 340
buffer_index = 295 , len(sampled_ep) = 142, trace = from 122 to 130
ep_idx[i] = 238
buffer_index = 193 , len(sampled_ep) = 79, trace = from 67 to 75
ep_idx[i] = 185
buffer_index = 140 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 247
buffer_index = 202 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 98
buffer_index = 53 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 343
buffer_index = 298 , len

ep_idx[i] = 79
buffer_index = 34 , len(sampled_ep) = 75, trace = from 36 to 44
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 69 to 77
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9967435000000269
epsilon is = 0.9967420000000269
epsilon is = 0.9967405000000269
epsilon is = 0.9967390000000269
epsilon is = 0.9967375000000269
Target Set Success
ep_idx = [196, 309, 56, 131, 295, 334, 132, 226, 64, 152, 211, 241, 123, 297, 86, 299, 223, 200, 339, 332, 285, 219, 174, 262, 144, 152, 255, 180, 304, 46, 268, 250]
exp_idx = [13, 17, 106, 69, 75, 108, 54, 59, 17, 27, 79, 23, 138, 17, 48, 40, 20, 59, 15, 40, 108, 26, 80, 9, 60, 53, 65, 46, 31, 26, 53, 19]
idx_offset = 45, self.episode_index = 344, len(self.buffer) = 300
ep_idx[i] = 196
buffer_index = 151 , len(sampled_ep) = 135, trace = from 6 to 14
ep_idx[i] = 309
buffer_index = 264 , len(sampled_ep) = 115, trace = from 10 to 18
ep_idx[i] = 56
buffer_index = 11 , len(sampled_ep) = 198, trace = from 99 to 107
ep_idx[i]

buffer_index = 240 , len(sampled_ep) = 166, trace = from 9 to 17
ep_idx[i] = 142
buffer_index = 97 , len(sampled_ep) = 108, trace = from 27 to 35
ep_idx[i] = 287
buffer_index = 242 , len(sampled_ep) = 194, trace = from 7 to 15
ep_idx[i] = 285
buffer_index = 240 , len(sampled_ep) = 166, trace = from 18 to 26
ep_idx[i] = 102
buffer_index = 57 , len(sampled_ep) = 103, trace = from 65 to 73
ep_idx[i] = 135
buffer_index = 90 , len(sampled_ep) = 211, trace = from 171 to 179
ep_idx[i] = 324
buffer_index = 279 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 335
buffer_index = 290 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 162
buffer_index = 117 , len(sampled_ep) = 112, trace = from 88 to 96
ep_idx[i] = 60
buffer_index = 15 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 182
buffer_index = 137 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 228
buffer_index = 183 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 300
buffer_index = 255 , len(sam

ep_idx = [262, 70, 217, 79, 343, 123, 334, 70, 175, 196, 215, 334, 161, 310, 152, 329, 319, 275, 106, 142, 154, 59, 338, 242, 290, 277, 163, 334, 258, 170, 254, 127]
exp_idx = [40, 60, 45, 14, 11, 147, 99, 171, 18, 99, 59, 108, 85, 87, 109, 36, 67, 52, 19, 36, 135, 75, 80, 77, 13, 16, 112, 81, 45, 75, 98, 28]
idx_offset = 45, self.episode_index = 344, len(self.buffer) = 300
ep_idx[i] = 262
buffer_index = 217 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 70
buffer_index = 25 , len(sampled_ep) = 198, trace = from 53 to 61
ep_idx[i] = 217
buffer_index = 172 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 79
buffer_index = 34 , len(sampled_ep) = 75, trace = from 7 to 15
ep_idx[i] = 343
buffer_index = 298 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 123
buffer_index = 78 , len(sampled_ep) = 198, trace = from 140 to 148
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 92 to 100
ep_idx[i] = 70
buffer_index = 25 , len(sampled_ep) = 198,

buffer_index = 242 , len(sampled_ep) = 194, trace = from 59 to 67
ep_idx[i] = 130
buffer_index = 85 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 294
buffer_index = 249 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 317
buffer_index = 272 , len(sampled_ep) = 84, trace = from 12 to 20
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 210
buffer_index = 165 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 330
buffer_index = 285 , len(sampled_ep) = 175, trace = from 162 to 170
ep_idx[i] = 54
buffer_index = 9 , len(sampled_ep) = 47, trace = from 5 to 13
ep_idx[i] = 300
buffer_index = 255 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 168
buffer_index = 123 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 152
buffer_index = 107 , len(sampled_ep) = 228, trace = from 24 to 32
ep_idx[i] = 295
buffer_index = 250 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 287
buffer_index = 242 , len(sa

buffer_index = 285 , len(sampled_ep) = 175, trace = from 31 to 39
ep_idx[i] = 88
buffer_index = 43 , len(sampled_ep) = 114, trace = from 38 to 46
ep_idx[i] = 151
buffer_index = 106 , len(sampled_ep) = 82, trace = from 12 to 20
ep_idx[i] = 73
buffer_index = 28 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 330
buffer_index = 285 , len(sampled_ep) = 175, trace = from 89 to 97
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 289
buffer_index = 244 , len(sampled_ep) = 83, trace = from 40 to 48
ep_idx[i] = 115
buffer_index = 70 , len(sampled_ep) = 299, trace = from 166 to 174
ep_idx[i] = 239
buffer_index = 194 , len(sampled_ep) = 101, trace = from 9 to 17
ep_idx[i] = 202
buffer_index = 157 , len(sampled_ep) = 143, trace = from 121 to 129
ep_idx[i] = 111
buffer_index = 66 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 298
buffer_index = 253 , len(sampled_ep) = 115, trace = from 30 to 38
ep_idx[i] = 203
buffer_index = 158 , 

buffer_index = 209 , len(sampled_ep) = 116, trace = from 42 to 50
ep_idx[i] = 328
buffer_index = 283 , len(sampled_ep) = 75, trace = from 17 to 25
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 168
buffer_index = 123 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 251
buffer_index = 206 , len(sampled_ep) = 133, trace = from 4 to 12
ep_idx[i] = 101
buffer_index = 56 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 65
buffer_index = 20 , len(sampled_ep) = 45, trace = from 18 to 26
ep_idx[i] = 314
buffer_index = 269 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 135
buffer_index = 90 , len(sampled_ep) = 211, trace = from 136 to 144
ep_idx[i] = 56
buffer_index = 11 , len(sampled_ep) = 198, trace = from 143 to 151
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9966460000000277
epsilon is = 0.9966445000000277
epsilon is = 0.9966430000000277
epsilon is = 0.9966415000000277
epsilon is = 0.9966400000000277
Target Se

buffer_index = 65 , len(sampled_ep) = 137, trace = from 8 to 16
ep_idx[i] = 104
buffer_index = 59 , len(sampled_ep) = 50, trace = from 12 to 20
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 117
buffer_index = 72 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 186
buffer_index = 141 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 312
buffer_index = 267 , len(sampled_ep) = 108, trace = from 9 to 17
ep_idx[i] = 135
buffer_index = 90 , len(sampled_ep) = 211, trace = from 91 to 99
ep_idx[i] = 157
buffer_index = 112 , len(sampled_ep) = 103, trace = from 77 to 85
ep_idx[i] = 234
buffer_index = 189 , len(sampled_ep) = 167, trace = from 40 to 48
ep_idx[i] = 325
buffer_index = 280 , len(sampled_ep) = 82, trace = from 70 to 78
ep_idx[i] = 334
buffer_index = 289 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 217
buffer_index = 172 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 302
buffer_index = 257 , len

ep_idx[i] = 332
buffer_index = 286 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 236
buffer_index = 190 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 246
buffer_index = 200 , len(sampled_ep) = 47, trace = from 21 to 29
ep_idx[i] = 334
buffer_index = 288 , len(sampled_ep) = 113, trace = from 93 to 101
ep_idx[i] = 110
buffer_index = 64 , len(sampled_ep) = 137, trace = from 115 to 123
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.996608500000028
epsilon is = 0.996607000000028
epsilon is = 0.996605500000028
epsilon is = 0.996604000000028
epsilon is = 0.996602500000028
Target Set Success
ep_idx = [161, 313, 204, 185, 127, 286, 176, 281, 77, 330, 305, 167, 175, 330, 257, 66, 115, 148, 303, 135, 305, 116, 261, 332, 288, 114, 179, 192, 208, 152, 288, 290]
exp_idx = [29, 78, 33, 34, 17, 39, 62, 11, 74, 139, 14, 10, 91, 156, 17, 39, 15, 66, 29, 147, 46, 82, 40, 69, 53, 8, 13, 73, 16, 107, 14, 150]
idx_offset = 46, self.episode_index = 345, len(self.buffer) = 300
ep_idx[

buffer_index = 11 , len(sampled_ep) = 110, trace = from 73 to 81
ep_idx[i] = 133
buffer_index = 87 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 223
buffer_index = 177 , len(sampled_ep) = 112, trace = from 36 to 44
ep_idx[i] = 327
buffer_index = 281 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 115
buffer_index = 69 , len(sampled_ep) = 299, trace = from 126 to 134
ep_idx[i] = 333
buffer_index = 287 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 57
buffer_index = 11 , len(sampled_ep) = 110, trace = from 2 to 10
ep_idx[i] = 334
buffer_index = 288 , len(sampled_ep) = 113, trace = from 6 to 14
ep_idx[i] = 299
buffer_index = 253 , len(sampled_ep) = 100, trace = from 5 to 13
ep_idx[i] = 69
buffer_index = 23 , len(sampled_ep) = 44, trace = from 9 to 17
ep_idx[i] = 129
buffer_index = 83 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 287
buffer_index = 241 , len(sampled_ep) = 194, trace = from 10 to 18
ep_idx[i] = 252
buffer_index = 206 , len(sample

epsilon is = 0.9965680000000283
epsilon is = 0.9965665000000283
epsilon is = 0.9965650000000283
Target Set Success
ep_idx = [146, 268, 239, 286, 226, 152, 189, 135, 140, 201, 279, 316, 289, 250, 332, 84, 334, 308, 162, 147, 336, 272, 284, 249, 312, 197, 309, 152, 86, 89, 113, 334]
exp_idx = [75, 41, 85, 25, 36, 92, 17, 141, 14, 80, 26, 53, 35, 20, 15, 18, 101, 41, 77, 39, 36, 49, 11, 11, 16, 80, 78, 31, 48, 77, 42, 107]
idx_offset = 46, self.episode_index = 345, len(self.buffer) = 300
ep_idx[i] = 146
buffer_index = 100 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 268
buffer_index = 222 , len(sampled_ep) = 120, trace = from 34 to 42
ep_idx[i] = 239
buffer_index = 193 , len(sampled_ep) = 101, trace = from 78 to 86
ep_idx[i] = 286
buffer_index = 240 , len(sampled_ep) = 147, trace = from 18 to 26
ep_idx[i] = 226
buffer_index = 180 , len(sampled_ep) = 120, trace = from 29 to 37
ep_idx[i] = 152
buffer_index = 106 , len(sampled_ep) = 228, trace = from 85 to 93
ep_idx[i] = 189
buf

buffer_index = 119 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 290
buffer_index = 244 , len(sampled_ep) = 152, trace = from 85 to 93
ep_idx[i] = 135
buffer_index = 89 , len(sampled_ep) = 211, trace = from 142 to 150
ep_idx[i] = 56
buffer_index = 10 , len(sampled_ep) = 198, trace = from 77 to 85
ep_idx[i] = 207
buffer_index = 161 , len(sampled_ep) = 75, trace = from 36 to 44
ep_idx[i] = 336
buffer_index = 290 , len(sampled_ep) = 102, trace = from 21 to 29
ep_idx[i] = 243
buffer_index = 197 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 104
buffer_index = 58 , len(sampled_ep) = 50, trace = from 0 to 8
ep_idx[i] = 76
buffer_index = 30 , len(sampled_ep) = 198, trace = from 101 to 109
ep_idx[i] = 285
buffer_index = 239 , len(sampled_ep) = 166, trace = from 26 to 34
ep_idx[i] = 119
buffer_index = 73 , len(sampled_ep) = 183, trace = from 127 to 135
ep_idx[i] = 108
buffer_index = 62 , len(sampled_ep) = 45, trace = from 10 to 18
ep_idx[i] = 149
buffer_index = 103 , len(

idx_offset = 46, self.episode_index = 345, len(self.buffer) = 300
ep_idx[i] = 298
buffer_index = 252 , len(sampled_ep) = 115, trace = from 61 to 69
ep_idx[i] = 110
buffer_index = 64 , len(sampled_ep) = 137, trace = from 118 to 126
ep_idx[i] = 167
buffer_index = 121 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 254
buffer_index = 208 , len(sampled_ep) = 116, trace = from 53 to 61
ep_idx[i] = 313
buffer_index = 267 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 76
buffer_index = 30 , len(sampled_ep) = 198, trace = from 115 to 123
ep_idx[i] = 242
buffer_index = 196 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 267
buffer_index = 221 , len(sampled_ep) = 83, trace = from 15 to 23
ep_idx[i] = 66
buffer_index = 20 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 292
buffer_index = 246 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 268
buffer_index = 222 , len(sampled_ep) = 120, trace = from 12 to 20
ep_idx[i] = 119
buffer_index = 73 , len(s

ep_idx[i] = 123
buffer_index = 77 , len(sampled_ep) = 198, trace = from 115 to 123
ep_idx[i] = 120
buffer_index = 74 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 220
buffer_index = 174 , len(sampled_ep) = 118, trace = from 84 to 92
ep_idx[i] = 89
buffer_index = 43 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 56
buffer_index = 10 , len(sampled_ep) = 198, trace = from 72 to 80
ep_idx[i] = 191
buffer_index = 145 , len(sampled_ep) = 131, trace = from 45 to 53
ep_idx[i] = 299
buffer_index = 253 , len(sampled_ep) = 100, trace = from 87 to 95
ep_idx[i] = 155
buffer_index = 109 , len(sampled_ep) = 77, trace = from 41 to 49
ep_idx[i] = 131
buffer_index = 85 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 104
buffer_index = 58 , len(sampled_ep) = 50, trace = from 18 to 26
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9965110000000288
epsilon is = 0.9965095000000288
epsilon is = 0.9965080000000288
epsilon is = 0.9965065000000288
length of poped element = 81 , c

buffer_index = 30 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 225
buffer_index = 178 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 287
buffer_index = 240 , len(sampled_ep) = 194, trace = from 17 to 25
ep_idx[i] = 106
buffer_index = 59 , len(sampled_ep) = 103, trace = from 37 to 45
ep_idx[i] = 211
buffer_index = 164 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 289
buffer_index = 242 , len(sampled_ep) = 83, trace = from 73 to 81
ep_idx[i] = 166
buffer_index = 119 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 316
buffer_index = 269 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 142
buffer_index = 95 , len(sampled_ep) = 108, trace = from 46 to 54
ep_idx[i] = 299
buffer_index = 252 , len(sampled_ep) = 100, trace = from 6 to 14
ep_idx[i] = 135
buffer_index = 88 , len(sampled_ep) = 211, trace = from 47 to 55
ep_idx[i] = 279
buffer_index = 232 , len(sampled_ep) = 48, trace = from 28 to 36
ep_idx[i] = 55
buffer_index = 8 , len(sampled

buffer_index = 89 , len(sampled_ep) = 108, trace = from 70 to 78
ep_idx[i] = 312
buffer_index = 265 , len(sampled_ep) = 108, trace = from 79 to 87
ep_idx[i] = 331
buffer_index = 284 , len(sampled_ep) = 84, trace = from 42 to 50
ep_idx[i] = 330
buffer_index = 283 , len(sampled_ep) = 175, trace = from 140 to 148
ep_idx[i] = 162
buffer_index = 115 , len(sampled_ep) = 112, trace = from 6 to 14
ep_idx[i] = 334
buffer_index = 287 , len(sampled_ep) = 113, trace = from 90 to 98
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9964735000000291
epsilon is = 0.9964720000000291
epsilon is = 0.9964705000000291
epsilon is = 0.9964690000000291
epsilon is = 0.9964675000000291
Target Set Success
ep_idx = [334, 51, 334, 100, 312, 274, 314, 75, 268, 330, 157, 256, 186, 205, 270, 100, 313, 309, 73, 123, 134, 140, 74, 291, 266, 287, 116, 334, 237, 284, 334, 145]
exp_idx = [106, 16, 104, 88, 87, 15, 25, 49, 90, 110, 75, 54, 79, 63, 26, 63, 17, 10, 17, 16, 71, 60, 46, 91, 31, 19, 86, 72, 27, 17, 90, 14]
idx_o

buffer_index = 283 , len(sampled_ep) = 175, trace = from 72 to 80
ep_idx[i] = 334
buffer_index = 287 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 125
buffer_index = 78 , len(sampled_ep) = 75, trace = from 0 to 8
ep_idx[i] = 334
buffer_index = 287 , len(sampled_ep) = 113, trace = from 48 to 56
ep_idx[i] = 186
buffer_index = 139 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 240
buffer_index = 193 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 105
buffer_index = 58 , len(sampled_ep) = 74, trace = from 3 to 11
ep_idx[i] = 213
buffer_index = 166 , len(sampled_ep) = 148, trace = from 136 to 144
ep_idx[i] = 77
buffer_index = 30 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 133
buffer_index = 86 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 202
buffer_index = 155 , len(sampled_ep) = 143, trace = from 16 to 24
ep_idx[i] = 224
buffer_index = 177 , len(sampled_ep) = 46, trace = from 25 to 33
ep_idx[i] = 154
buffer_index = 107 , len(sam

buffer_index = 271 , len(sampled_ep) = 81, trace = from 9 to 17
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9964360000000294
epsilon is = 0.9964345000000294
epsilon is = 0.9964330000000294
epsilon is = 0.9964315000000294
epsilon is = 0.9964300000000295
Target Set Success
ep_idx = [105, 292, 342, 340, 70, 156, 91, 77, 96, 160, 85, 233, 135, 76, 327, 248, 90, 138, 243, 122, 125, 109, 202, 107, 213, 53, 112, 139, 288, 86, 322, 115]
exp_idx = [52, 40, 51, 136, 135, 9, 12, 16, 17, 53, 80, 15, 41, 165, 26, 35, 66, 54, 61, 27, 27, 29, 94, 58, 103, 73, 16, 24, 51, 12, 94, 73]
idx_offset = 47, self.episode_index = 346, len(self.buffer) = 300
ep_idx[i] = 105
buffer_index = 58 , len(sampled_ep) = 74, trace = from 45 to 53
ep_idx[i] = 292
buffer_index = 245 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 342
buffer_index = 295 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 340
buffer_index = 293 , len(sampled_ep) = 142, trace = from 129 to 137
ep_idx[i] = 70
buffer_index =

buffer_index = 78 , len(sampled_ep) = 75, trace = from 44 to 52
ep_idx[i] = 301
buffer_index = 254 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 151
buffer_index = 104 , len(sampled_ep) = 82, trace = from 11 to 19
ep_idx[i] = 223
buffer_index = 176 , len(sampled_ep) = 112, trace = from 15 to 23
ep_idx[i] = 82
buffer_index = 35 , len(sampled_ep) = 73, trace = from 8 to 16
ep_idx[i] = 272
buffer_index = 225 , len(sampled_ep) = 70, trace = from 18 to 26
ep_idx[i] = 238
buffer_index = 191 , len(sampled_ep) = 79, trace = from 51 to 59
ep_idx[i] = 167
buffer_index = 120 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 92
buffer_index = 45 , len(sampled_ep) = 281, trace = from 106 to 114
ep_idx[i] = 179
buffer_index = 132 , len(sampled_ep) = 77, trace = from 28 to 36
ep_idx[i] = 193
buffer_index = 146 , len(sampled_ep) = 108, trace = from 86 to 94
ep_idx[i] = 260
buffer_index = 213 , len(sampled_ep) = 108, trace = from 7 to 15
ep_idx[i] = 184
buffer_index = 137 , len(samp

exp_idx = [104, 18, 22, 11, 88, 106, 232, 17, 54, 105, 14, 73, 31, 190, 12, 51, 8, 40, 14, 17, 44, 43, 105, 39, 69, 79, 12, 12, 57, 41, 20, 11]
idx_offset = 47, self.episode_index = 346, len(self.buffer) = 300
ep_idx[i] = 88
buffer_index = 41 , len(sampled_ep) = 114, trace = from 97 to 105
ep_idx[i] = 225
buffer_index = 178 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 154
buffer_index = 107 , len(sampled_ep) = 153, trace = from 15 to 23
ep_idx[i] = 281
buffer_index = 234 , len(sampled_ep) = 178, trace = from 4 to 12
ep_idx[i] = 115
buffer_index = 68 , len(sampled_ep) = 299, trace = from 81 to 89
ep_idx[i] = 234
buffer_index = 187 , len(sampled_ep) = 167, trace = from 99 to 107
ep_idx[i] = 115
buffer_index = 68 , len(sampled_ep) = 299, trace = from 225 to 233
ep_idx[i] = 290
buffer_index = 243 , len(sampled_ep) = 152, trace = from 10 to 18
ep_idx[i] = 229
buffer_index = 182 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 309
buffer_index = 262 , len(sampled_ep) = 

buffer_index = 155 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 340
buffer_index = 292 , len(sampled_ep) = 142, trace = from 86 to 94
ep_idx[i] = 313
buffer_index = 265 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 90
buffer_index = 42 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 205
buffer_index = 157 , len(sampled_ep) = 78, trace = from 58 to 66
ep_idx[i] = 287
buffer_index = 239 , len(sampled_ep) = 194, trace = from 19 to 27
ep_idx[i] = 103
buffer_index = 55 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 326
buffer_index = 278 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 191
buffer_index = 143 , len(sampled_ep) = 131, trace = from 45 to 53
ep_idx[i] = 265
buffer_index = 217 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 330
buffer_index = 282 , len(sampled_ep) = 175, trace = from 138 to 146
ep_idx[i] = 98
buffer_index = 50 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 232
buffer_index = 184 , len(sa

buffer_index = 158 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 130
buffer_index = 82 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 261
buffer_index = 213 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 231
buffer_index = 183 , len(sampled_ep) = 77, trace = from 13 to 21
ep_idx[i] = 124
buffer_index = 76 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 316
buffer_index = 268 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 311
buffer_index = 263 , len(sampled_ep) = 84, trace = from 8 to 16
ep_idx[i] = 334
buffer_index = 286 , len(sampled_ep) = 113, trace = from 48 to 56
ep_idx[i] = 214
buffer_index = 166 , len(sampled_ep) = 75, trace = from 45 to 53
ep_idx[i] = 253
buffer_index = 205 , len(sampled_ep) = 76, trace = from 54 to 62
ep_idx[i] = 69
buffer_index = 21 , len(sampled_ep) = 44, trace = from 3 to 11
ep_idx[i] = 222
buffer_index = 174 , len(sampled_ep) = 224, trace = from 143 to 151
ep_idx[i] = 251
buffer_index = 203 , len(sample

ep_idx[i] = 334
buffer_index = 286 , len(sampled_ep) = 113, trace = from 88 to 96
ep_idx[i] = 106
buffer_index = 58 , len(sampled_ep) = 103, trace = from 82 to 90
ep_idx[i] = 296
buffer_index = 248 , len(sampled_ep) = 108, trace = from 46 to 54
ep_idx[i] = 334
buffer_index = 286 , len(sampled_ep) = 113, trace = from 95 to 103
ep_idx[i] = 70
buffer_index = 22 , len(sampled_ep) = 198, trace = from 65 to 73
ep_idx[i] = 330
buffer_index = 282 , len(sampled_ep) = 175, trace = from 26 to 34
ep_idx[i] = 154
buffer_index = 106 , len(sampled_ep) = 153, trace = from 64 to 72
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9963385000000302
epsilon is = 0.9963370000000302
epsilon is = 0.9963355000000302
epsilon is = 0.9963340000000303
epsilon is = 0.9963325000000303
Target Set Success
ep_idx = [336, 207, 73, 330, 334, 58, 247, 226, 158, 66, 187, 115, 134, 68, 162, 195, 292, 340, 211, 203, 56, 82, 240, 108, 290, 330, 269, 268, 261, 279, 135, 152]
exp_idx = [29, 53, 71, 130, 28, 30, 21, 57, 51, 67, 

buffer_index = 219 , len(sampled_ep) = 83, trace = from 28 to 36
ep_idx[i] = 129
buffer_index = 81 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 340
buffer_index = 292 , len(sampled_ep) = 142, trace = from 46 to 54
ep_idx[i] = 137
buffer_index = 89 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 76
buffer_index = 28 , len(sampled_ep) = 198, trace = from 37 to 45
ep_idx[i] = 317
buffer_index = 269 , len(sampled_ep) = 84, trace = from 38 to 46
ep_idx[i] = 135
buffer_index = 87 , len(sampled_ep) = 211, trace = from 50 to 58
ep_idx[i] = 286
buffer_index = 238 , len(sampled_ep) = 147, trace = from 76 to 84
ep_idx[i] = 56
buffer_index = 8 , len(sampled_ep) = 198, trace = from 142 to 150
ep_idx[i] = 334
buffer_index = 286 , len(sampled_ep) = 113, trace = from 59 to 67
ep_idx[i] = 334
buffer_index = 286 , len(sampled_ep) = 113, trace = from 61 to 69
ep_idx[i] = 180
buffer_index = 132 , len(sampled_ep) = 141, trace = from 128 to 136
ep_idx[i] = 192
buffer_index = 144 , len(

buffer_index = 14 , len(sampled_ep) = 77, trace = from 40 to 48
ep_idx[i] = 307
buffer_index = 259 , len(sampled_ep) = 48, trace = from 14 to 22
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9963010000000305
epsilon is = 0.9962995000000305
epsilon is = 0.9962980000000305
epsilon is = 0.9962965000000306
epsilon is = 0.9962950000000306
Target Set Success
ep_idx = [300, 196, 144, 132, 92, 290, 85, 286, 59, 322, 249, 162, 48, 211, 314, 135, 181, 208, 171, 72, 135, 50, 322, 250, 227, 75, 280, 151, 283, 163, 116, 100]
exp_idx = [69, 18, 60, 47, 25, 19, 95, 37, 50, 110, 10, 34, 45, 65, 22, 64, 13, 63, 16, 81, 82, 28, 59, 8, 47, 72, 73, 17, 17, 40, 67, 16]
idx_offset = 48, self.episode_index = 347, len(self.buffer) = 300
ep_idx[i] = 300
buffer_index = 252 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 196
buffer_index = 148 , len(sampled_ep) = 135, trace = from 11 to 19
ep_idx[i] = 144
buffer_index = 96 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 132
buffer_index = 8

buffer_index = 235 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 332
buffer_index = 284 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 281
buffer_index = 233 , len(sampled_ep) = 178, trace = from 106 to 114
ep_idx[i] = 324
buffer_index = 276 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 178
buffer_index = 130 , len(sampled_ep) = 80, trace = from 15 to 23
ep_idx[i] = 223
buffer_index = 175 , len(sampled_ep) = 112, trace = from 30 to 38
ep_idx[i] = 134
buffer_index = 86 , len(sampled_ep) = 208, trace = from 100 to 108
ep_idx[i] = 123
buffer_index = 75 , len(sampled_ep) = 198, trace = from 136 to 144
ep_idx[i] = 142
buffer_index = 94 , len(sampled_ep) = 108, trace = from 46 to 54
ep_idx[i] = 187
buffer_index = 139 , len(sampled_ep) = 84, trace = from 43 to 51
ep_idx[i] = 216
buffer_index = 168 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 198
buffer_index = 150 , len(sampled_ep) = 42, trace = from 31 to 39
ep_idx[i] = 274
buffer_index = 226 ,

ep_idx = [310, 139, 112, 76, 311, 222, 243, 265, 106, 213, 301, 126, 154, 167, 334, 210, 217, 247, 71, 334, 96, 58, 305, 87, 207, 325, 330, 178, 122, 275, 171, 307]
exp_idx = [74, 67, 35, 107, 49, 106, 45, 69, 74, 94, 36, 13, 77, 45, 107, 13, 19, 19, 131, 93, 73, 17, 8, 72, 11, 22, 137, 48, 18, 14, 79, 18]
idx_offset = 49, self.episode_index = 348, len(self.buffer) = 300
ep_idx[i] = 310
buffer_index = 261 , len(sampled_ep) = 164, trace = from 67 to 75
ep_idx[i] = 139
buffer_index = 90 , len(sampled_ep) = 133, trace = from 60 to 68
ep_idx[i] = 112
buffer_index = 63 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 76
buffer_index = 27 , len(sampled_ep) = 198, trace = from 100 to 108
ep_idx[i] = 311
buffer_index = 262 , len(sampled_ep) = 84, trace = from 42 to 50
ep_idx[i] = 222
buffer_index = 173 , len(sampled_ep) = 224, trace = from 99 to 107
ep_idx[i] = 243
buffer_index = 194 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 265
buffer_index = 216 , len(sampled_ep) = 8

buffer_index = 224 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 242
buffer_index = 193 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 204
buffer_index = 155 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 292
buffer_index = 243 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 195
buffer_index = 146 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 327
buffer_index = 278 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 162
buffer_index = 113 , len(sampled_ep) = 112, trace = from 18 to 26
ep_idx[i] = 168
buffer_index = 119 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 249
buffer_index = 200 , len(sampled_ep) = 75, trace = from 53 to 61
ep_idx[i] = 228
buffer_index = 179 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 271
buffer_index = 222 , len(sampled_ep) = 46, trace = from 31 to 39
ep_idx[i] = 184
buffer_index = 135 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 75
buffer_index = 26 , len(sampl

buffer_index = 62 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 209
buffer_index = 160 , len(sampled_ep) = 71, trace = from 35 to 43
ep_idx[i] = 213
buffer_index = 164 , len(sampled_ep) = 148, trace = from 94 to 102
ep_idx[i] = 278
buffer_index = 229 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 203
buffer_index = 154 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 235
buffer_index = 186 , len(sampled_ep) = 46, trace = from 30 to 38
ep_idx[i] = 336
buffer_index = 287 , len(sampled_ep) = 102, trace = from 38 to 46
ep_idx[i] = 219
buffer_index = 170 , len(sampled_ep) = 110, trace = from 7 to 15
ep_idx[i] = 171
buffer_index = 122 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 226
buffer_index = 177 , len(sampled_ep) = 120, trace = from 48 to 56
ep_idx[i] = 138
buffer_index = 89 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 168
buffer_index = 119 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 220
buffer_index = 171 , len(sam

buffer_index = 96 , len(sampled_ep) = 117, trace = from 53 to 61
ep_idx[i] = 236
buffer_index = 187 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 240
buffer_index = 191 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 222
buffer_index = 173 , len(sampled_ep) = 224, trace = from 9 to 17
ep_idx[i] = 193
buffer_index = 144 , len(sampled_ep) = 108, trace = from 95 to 103
ep_idx[i] = 240
buffer_index = 191 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 266
buffer_index = 217 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 216
buffer_index = 167 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 106
buffer_index = 57 , len(sampled_ep) = 103, trace = from 84 to 92
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9962035000000313
epsilon is = 0.9962020000000313
epsilon is = 0.9962005000000314
epsilon is = 0.9961990000000314
epsilon is = 0.9961975000000314
Target Set Success
ep_idx = [286, 215, 61, 157, 115, 217, 219, 116, 277, 339, 119, 132, 103, 

buffer_index = 103 , len(sampled_ep) = 228, trace = from 210 to 218
ep_idx[i] = 205
buffer_index = 156 , len(sampled_ep) = 78, trace = from 60 to 68
ep_idx[i] = 69
buffer_index = 20 , len(sampled_ep) = 44, trace = from 36 to 44
ep_idx[i] = 300
buffer_index = 251 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 288
buffer_index = 239 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 213
buffer_index = 164 , len(sampled_ep) = 148, trace = from 83 to 91
ep_idx[i] = 214
buffer_index = 165 , len(sampled_ep) = 75, trace = from 39 to 47
ep_idx[i] = 71
buffer_index = 22 , len(sampled_ep) = 299, trace = from 109 to 117
ep_idx[i] = 289
buffer_index = 240 , len(sampled_ep) = 83, trace = from 42 to 50
ep_idx[i] = 184
buffer_index = 135 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 162
buffer_index = 113 , len(sampled_ep) = 112, trace = from 102 to 110
ep_idx[i] = 253
buffer_index = 204 , len(sampled_ep) = 76, trace = from 6 to 14
ep_idx[i] = 218
buffer_index = 169 , len(

buffer_index = 285 , len(sampled_ep) = 113, trace = from 92 to 100
ep_idx[i] = 101
buffer_index = 52 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 214
buffer_index = 165 , len(sampled_ep) = 75, trace = from 51 to 59
ep_idx[i] = 330
buffer_index = 281 , len(sampled_ep) = 175, trace = from 159 to 167
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9961660000000316
epsilon is = 0.9961645000000316
epsilon is = 0.9961630000000317
epsilon is = 0.9961615000000317
epsilon is = 0.9961600000000317
Target Set Success
ep_idx = [115, 262, 292, 278, 259, 128, 228, 222, 334, 234, 190, 170, 222, 112, 64, 287, 71, 205, 71, 136, 112, 222, 222, 311, 180, 83, 281, 330, 234, 199, 104, 320]
exp_idx = [139, 42, 43, 74, 12, 30, 34, 59, 108, 145, 10, 16, 90, 16, 57, 16, 198, 9, 286, 84, 50, 108, 202, 35, 60, 63, 138, 56, 115, 8, 19, 62]
idx_offset = 49, self.episode_index = 348, len(self.buffer) = 300
ep_idx[i] = 115
buffer_index = 66 , len(sampled_ep) = 299, trace = from 132 to 140
ep_idx[i] = 262

buffer_index = 142 , len(sampled_ep) = 131, trace = from 43 to 51
ep_idx[i] = 164
buffer_index = 115 , len(sampled_ep) = 118, trace = from 106 to 114
ep_idx[i] = 163
buffer_index = 114 , len(sampled_ep) = 134, trace = from 40 to 48
ep_idx[i] = 284
buffer_index = 235 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 302
buffer_index = 253 , len(sampled_ep) = 79, trace = from 27 to 35
ep_idx[i] = 220
buffer_index = 171 , len(sampled_ep) = 118, trace = from 90 to 98
ep_idx[i] = 259
buffer_index = 210 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 198
buffer_index = 149 , len(sampled_ep) = 42, trace = from 16 to 24
ep_idx[i] = 281
buffer_index = 232 , len(sampled_ep) = 178, trace = from 46 to 54
ep_idx[i] = 231
buffer_index = 182 , len(sampled_ep) = 77, trace = from 59 to 67
ep_idx[i] = 145
buffer_index = 96 , len(sampled_ep) = 117, trace = from 97 to 105
ep_idx[i] = 337
buffer_index = 288 , len(sampled_ep) = 82, trace = from 45 to 53
ep_idx[i] = 329
buffer_index = 280 ,

epsilon is = 0.996122500000032
Target Set Success
ep_idx = [250, 260, 163, 330, 216, 237, 290, 140, 310, 298, 160, 334, 202, 112, 266, 330, 247, 184, 162, 172, 271, 267, 252, 237, 151, 260, 286, 175, 128, 158, 334, 340]
exp_idx = [18, 94, 115, 120, 21, 70, 65, 16, 39, 99, 20, 99, 37, 39, 10, 156, 71, 68, 58, 19, 18, 25, 47, 23, 16, 53, 124, 54, 41, 8, 106, 41]
idx_offset = 49, self.episode_index = 348, len(self.buffer) = 300
ep_idx[i] = 250
buffer_index = 201 , len(sampled_ep) = 49, trace = from 11 to 19
ep_idx[i] = 260
buffer_index = 211 , len(sampled_ep) = 108, trace = from 87 to 95
ep_idx[i] = 163
buffer_index = 114 , len(sampled_ep) = 134, trace = from 108 to 116
ep_idx[i] = 330
buffer_index = 281 , len(sampled_ep) = 175, trace = from 113 to 121
ep_idx[i] = 216
buffer_index = 167 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 237
buffer_index = 188 , len(sampled_ep) = 116, trace = from 63 to 71
ep_idx[i] = 290
buffer_index = 241 , len(sampled_ep) = 152, trace = from 58 t

buffer_index = 275 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 152
buffer_index = 103 , len(sampled_ep) = 228, trace = from 164 to 172
ep_idx[i] = 162
buffer_index = 113 , len(sampled_ep) = 112, trace = from 12 to 20
ep_idx[i] = 332
buffer_index = 283 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 330
buffer_index = 281 , len(sampled_ep) = 175, trace = from 150 to 158
ep_idx[i] = 268
buffer_index = 219 , len(sampled_ep) = 120, trace = from 104 to 112
ep_idx[i] = 94
buffer_index = 45 , len(sampled_ep) = 45, trace = from 33 to 41
ep_idx[i] = 192
buffer_index = 143 , len(sampled_ep) = 78, trace = from 33 to 41
ep_idx[i] = 71
buffer_index = 22 , len(sampled_ep) = 299, trace = from 88 to 96
ep_idx[i] = 234
buffer_index = 185 , len(sampled_ep) = 167, trace = from 99 to 107
ep_idx[i] = 106
buffer_index = 57 , len(sampled_ep) = 103, trace = from 65 to 73
ep_idx[i] = 214
buffer_index = 165 , len(sampled_ep) = 75, trace = from 62 to 70
ep_idx[i] = 158
buffer_index = 109 

buffer_index = 239 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 334
buffer_index = 285 , len(sampled_ep) = 113, trace = from 96 to 104
ep_idx[i] = 320
buffer_index = 271 , len(sampled_ep) = 71, trace = from 51 to 59
ep_idx[i] = 295
buffer_index = 246 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 331
buffer_index = 282 , len(sampled_ep) = 84, trace = from 19 to 27
ep_idx[i] = 289
buffer_index = 240 , len(sampled_ep) = 83, trace = from 47 to 55
ep_idx[i] = 322
buffer_index = 273 , len(sampled_ep) = 111, trace = from 78 to 86
ep_idx[i] = 183
buffer_index = 134 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 182
buffer_index = 133 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 237
buffer_index = 188 , len(sampled_ep) = 116, trace = from 18 to 26
ep_idx[i] = 333
buffer_index = 284 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 327
buffer_index = 278 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 88
buffer_index = 39 , len(sa

buffer_index = 281 , len(sampled_ep) = 175, trace = from 125 to 133
ep_idx[i] = 120
buffer_index = 71 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 348
buffer_index = 299 , len(sampled_ep) = 77, trace = from 41 to 49
ep_idx[i] = 137
buffer_index = 88 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 70
buffer_index = 21 , len(sampled_ep) = 198, trace = from 79 to 87
ep_idx[i] = 92
buffer_index = 43 , len(sampled_ep) = 281, trace = from 117 to 125
ep_idx[i] = 119
buffer_index = 70 , len(sampled_ep) = 183, trace = from 47 to 55
ep_idx[i] = 76
buffer_index = 27 , len(sampled_ep) = 198, trace = from 37 to 45
ep_idx[i] = 168
buffer_index = 119 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 292
buffer_index = 243 , len(sampled_ep) = 81, trace = from 21 to 29
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9960685000000324
epsilon is = 0.9960670000000325
epsilon is = 0.9960655000000325
epsilon is = 0.9960640000000325
epsilon is = 0.9960625000000325
Target Set Su

buffer_index = 22 , len(sampled_ep) = 299, trace = from 154 to 162
ep_idx[i] = 322
buffer_index = 273 , len(sampled_ep) = 111, trace = from 18 to 26
ep_idx[i] = 222
buffer_index = 173 , len(sampled_ep) = 224, trace = from 72 to 80
ep_idx[i] = 119
buffer_index = 70 , len(sampled_ep) = 183, trace = from 125 to 133
ep_idx[i] = 124
buffer_index = 75 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 291
buffer_index = 242 , len(sampled_ep) = 103, trace = from 3 to 11
ep_idx[i] = 306
buffer_index = 257 , len(sampled_ep) = 117, trace = from 109 to 117
ep_idx[i] = 336
buffer_index = 287 , len(sampled_ep) = 102, trace = from 71 to 79
ep_idx[i] = 105
buffer_index = 56 , len(sampled_ep) = 74, trace = from 56 to 64
ep_idx[i] = 152
buffer_index = 103 , len(sampled_ep) = 228, trace = from 21 to 29
ep_idx[i] = 80
buffer_index = 31 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 324
buffer_index = 275 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 268
buffer_index = 219 , 

buffer_index = 28 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 285 , len(sampled_ep) = 113, trace = from 83 to 91
ep_idx[i] = 92
buffer_index = 43 , len(sampled_ep) = 281, trace = from 12 to 20
ep_idx[i] = 193
buffer_index = 144 , len(sampled_ep) = 108, trace = from 100 to 108
ep_idx[i] = 166
buffer_index = 117 , len(sampled_ep) = 81, trace = from 50 to 58
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9960310000000328
epsilon is = 0.9960295000000328
epsilon is = 0.9960280000000328
epsilon is = 0.9960265000000328
epsilon is = 0.9960250000000328
Target Set Success
ep_idx = [332, 128, 130, 286, 255, 82, 120, 331, 239, 254, 201, 239, 204, 318, 152, 76, 306, 334, 187, 324, 295, 189, 234, 212, 287, 186, 220, 346, 311, 289, 227, 213]
exp_idx = [67, 11, 8, 130, 29, 27, 73, 54, 53, 32, 18, 93, 45, 10, 145, 174, 71, 95, 15, 60, 45, 18, 142, 75, 79, 18, 75, 64, 79, 82, 51, 82]
idx_offset = 49, self.episode_index = 348, len(self.buffer) = 300
ep_idx[i] = 332
buffer

buffer_index = 225 , len(sampled_ep) = 114, trace = from 46 to 54
ep_idx[i] = 331
buffer_index = 282 , len(sampled_ep) = 84, trace = from 46 to 54
ep_idx[i] = 338
buffer_index = 289 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 332
buffer_index = 283 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 71
buffer_index = 22 , len(sampled_ep) = 299, trace = from 260 to 268
ep_idx[i] = 293
buffer_index = 244 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 81
buffer_index = 32 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 297
buffer_index = 248 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 208
buffer_index = 159 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 155
buffer_index = 106 , len(sampled_ep) = 77, trace = from 39 to 47
ep_idx[i] = 298
buffer_index = 249 , len(sampled_ep) = 115, trace = from 10 to 18
ep_idx[i] = 324
buffer_index = 275 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 234
buffer_index = 185 , len(sa

epsilon is = 0.9959920000000331
epsilon is = 0.9959905000000331
epsilon is = 0.9959890000000331
epsilon is = 0.9959875000000331
Target Set Success
ep_idx = [345, 102, 251, 122, 299, 285, 83, 204, 93, 169, 200, 328, 134, 154, 256, 251, 194, 303, 334, 202, 100, 334, 347, 270, 305, 183, 296, 76, 108, 56, 322, 100]
exp_idx = [67, 48, 16, 62, 17, 147, 45, 54, 50, 78, 44, 27, 127, 52, 49, 126, 42, 13, 98, 41, 9, 108, 50, 15, 49, 16, 99, 43, 28, 60, 30, 42]
idx_offset = 49, self.episode_index = 348, len(self.buffer) = 300
ep_idx[i] = 345
buffer_index = 296 , len(sampled_ep) = 108, trace = from 60 to 68
ep_idx[i] = 102
buffer_index = 53 , len(sampled_ep) = 103, trace = from 41 to 49
ep_idx[i] = 251
buffer_index = 202 , len(sampled_ep) = 133, trace = from 9 to 17
ep_idx[i] = 122
buffer_index = 73 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 299
buffer_index = 250 , len(sampled_ep) = 100, trace = from 10 to 18
ep_idx[i] = 285
buffer_index = 236 , len(sampled_ep) = 166, trace = from 

ep_idx[i] = 176
buffer_index = 127 , len(sampled_ep) = 80, trace = from 12 to 20
ep_idx[i] = 168
buffer_index = 119 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 251
buffer_index = 202 , len(sampled_ep) = 133, trace = from 3 to 11
ep_idx[i] = 312
buffer_index = 263 , len(sampled_ep) = 108, trace = from 73 to 81
ep_idx[i] = 147
buffer_index = 98 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 106
buffer_index = 57 , len(sampled_ep) = 103, trace = from 72 to 80
ep_idx[i] = 348
buffer_index = 299 , len(sampled_ep) = 77, trace = from 18 to 26
ep_idx[i] = 108
buffer_index = 59 , len(sampled_ep) = 45, trace = from 36 to 44
ep_idx[i] = 273
buffer_index = 224 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 333
buffer_index = 284 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 319
buffer_index = 270 , len(sampled_ep) = 70, trace = from 11 to 19
ep_idx[i] = 309
buffer_index = 260 , len(sampled_ep) = 115, trace = from 85 to 93
ep_idx[i] = 336
buffer_index

idx_offset = 49, self.episode_index = 348, len(self.buffer) = 300
ep_idx[i] = 330
buffer_index = 281 , len(sampled_ep) = 175, trace = from 155 to 163
ep_idx[i] = 115
buffer_index = 66 , len(sampled_ep) = 299, trace = from 179 to 187
ep_idx[i] = 333
buffer_index = 284 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 311
buffer_index = 262 , len(sampled_ep) = 84, trace = from 63 to 71
ep_idx[i] = 274
buffer_index = 225 , len(sampled_ep) = 114, trace = from 40 to 48
ep_idx[i] = 198
buffer_index = 149 , len(sampled_ep) = 42, trace = from 8 to 16
ep_idx[i] = 303
buffer_index = 254 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 334
buffer_index = 285 , len(sampled_ep) = 113, trace = from 67 to 75
ep_idx[i] = 327
buffer_index = 278 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 256
buffer_index = 207 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 304
buffer_index = 255 , len(sampled_ep) = 72, trace = from 16 to 24
ep_idx[i] = 186
buffer_index = 137 , 

buffer_index = 62 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 188
buffer_index = 139 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 260
buffer_index = 211 , len(sampled_ep) = 108, trace = from 70 to 78
ep_idx[i] = 99
buffer_index = 50 , len(sampled_ep) = 83, trace = from 22 to 30
ep_idx[i] = 152
buffer_index = 103 , len(sampled_ep) = 228, trace = from 8 to 16
ep_idx[i] = 258
buffer_index = 209 , len(sampled_ep) = 76, trace = from 6 to 14
ep_idx[i] = 85
buffer_index = 36 , len(sampled_ep) = 108, trace = from 69 to 77
ep_idx[i] = 311
buffer_index = 262 , len(sampled_ep) = 84, trace = from 17 to 25
ep_idx[i] = 80
buffer_index = 31 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 88
buffer_index = 39 , len(sampled_ep) = 114, trace = from 101 to 109
ep_idx[i] = 334
buffer_index = 285 , len(sampled_ep) = 113, trace = from 73 to 81
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9959335000000336
epsilon is = 0.9959320000000336
epsilon is = 0.9959305000000336


buffer_index = 248 , len(sampled_ep) = 115, trace = from 9 to 17
ep_idx[i] = 109
buffer_index = 59 , len(sampled_ep) = 48, trace = from 2 to 10
ep_idx[i] = 151
buffer_index = 101 , len(sampled_ep) = 82, trace = from 11 to 19
ep_idx[i] = 246
buffer_index = 196 , len(sampled_ep) = 47, trace = from 31 to 39
ep_idx[i] = 317
buffer_index = 267 , len(sampled_ep) = 84, trace = from 56 to 64
ep_idx[i] = 223
buffer_index = 173 , len(sampled_ep) = 112, trace = from 32 to 40
ep_idx[i] = 309
buffer_index = 259 , len(sampled_ep) = 115, trace = from 1 to 9
ep_idx[i] = 254
buffer_index = 204 , len(sampled_ep) = 116, trace = from 2 to 10
ep_idx[i] = 61
buffer_index = 11 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 51
buffer_index = 1 , len(sampled_ep) = 145, trace = from 8 to 16
ep_idx[i] = 100
buffer_index = 50 , len(sampled_ep) = 102, trace = from 31 to 39
ep_idx[i] = 153
buffer_index = 103 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 281
buffer_index = 231 , len(sampled_ep) 

ep_idx[i] = 174
buffer_index = 124 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 76
buffer_index = 26 , len(sampled_ep) = 198, trace = from 155 to 163
ep_idx[i] = 134
buffer_index = 84 , len(sampled_ep) = 208, trace = from 61 to 69
ep_idx[i] = 243
buffer_index = 193 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 162
buffer_index = 112 , len(sampled_ep) = 112, trace = from 83 to 91
ep_idx[i] = 111
buffer_index = 61 , len(sampled_ep) = 81, trace = from 43 to 51
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9958960000000339
epsilon is = 0.9958945000000339
epsilon is = 0.9958930000000339
epsilon is = 0.9958915000000339
epsilon is = 0.9958900000000339
Target Set Success
ep_idx = [140, 90, 338, 334, 308, 136, 51, 315, 191, 164, 316, 264, 56, 136, 211, 97, 340, 175, 176, 251, 193, 290, 228, 145, 183, 221, 347, 71, 110, 334, 267, 340]
exp_idx = [20, 68, 74, 46, 46, 61, 122, 18, 76, 19, 77, 35, 8, 83, 47, 16, 141, 86, 21, 43, 35, 95, 42, 92, 16, 32, 38, 153, 97, 98, 35,

buffer_index = 85 , len(sampled_ep) = 211, trace = from 140 to 148
ep_idx[i] = 286
buffer_index = 236 , len(sampled_ep) = 147, trace = from 90 to 98
ep_idx[i] = 287
buffer_index = 237 , len(sampled_ep) = 194, trace = from 63 to 71
ep_idx[i] = 126
buffer_index = 76 , len(sampled_ep) = 46, trace = from 27 to 35
ep_idx[i] = 123
buffer_index = 73 , len(sampled_ep) = 198, trace = from 72 to 80
ep_idx[i] = 109
buffer_index = 59 , len(sampled_ep) = 48, trace = from 17 to 25
ep_idx[i] = 264
buffer_index = 214 , len(sampled_ep) = 109, trace = from 7 to 15
ep_idx[i] = 171
buffer_index = 121 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 57
buffer_index = 7 , len(sampled_ep) = 110, trace = from 6 to 14
ep_idx[i] = 150
buffer_index = 100 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 70
buffer_index = 20 , len(sampled_ep) = 198, trace = from 124 to 132
ep_idx[i] = 222
buffer_index = 172 , len(sampled_ep) = 224, trace = from 74 to 82
ep_idx[i] = 68
buffer_index = 18 , len(sampl

buffer_index = 73 , len(sampled_ep) = 198, trace = from 184 to 192
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9958585000000342
epsilon is = 0.9958570000000342
epsilon is = 0.9958555000000342
epsilon is = 0.9958540000000342
epsilon is = 0.9958525000000342
Target Set Success
ep_idx = [296, 115, 169, 290, 264, 334, 243, 273, 134, 276, 84, 166, 121, 209, 325, 309, 71, 256, 78, 171, 278, 274, 56, 176, 152, 230, 286, 332, 57, 99, 198, 334]
exp_idx = [55, 121, 61, 76, 97, 90, 79, 16, 56, 73, 18, 28, 16, 26, 23, 59, 148, 47, 43, 53, 45, 43, 87, 42, 45, 16, 38, 53, 43, 51, 16, 50]
idx_offset = 50, self.episode_index = 349, len(self.buffer) = 300
ep_idx[i] = 296
buffer_index = 246 , len(sampled_ep) = 108, trace = from 48 to 56
ep_idx[i] = 115
buffer_index = 65 , len(sampled_ep) = 299, trace = from 114 to 122
ep_idx[i] = 169
buffer_index = 119 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 290
buffer_index = 240 , len(sampled_ep) = 152, trace = from 69 to 77
ep_idx[i] = 264
buffer

buffer_index = 69 , len(sampled_ep) = 183, trace = from 122 to 130
ep_idx[i] = 192
buffer_index = 142 , len(sampled_ep) = 78, trace = from 8 to 16
ep_idx[i] = 111
buffer_index = 61 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 330
buffer_index = 280 , len(sampled_ep) = 175, trace = from 14 to 22
ep_idx[i] = 227
buffer_index = 177 , len(sampled_ep) = 112, trace = from 93 to 101
ep_idx[i] = 165
buffer_index = 115 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 247
buffer_index = 197 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 284 , len(sampled_ep) = 113, trace = from 98 to 106
ep_idx[i] = 309
buffer_index = 259 , len(sampled_ep) = 115, trace = from 90 to 98
ep_idx[i] = 86
buffer_index = 36 , len(sampled_ep) = 77, trace = from 66 to 74
ep_idx[i] = 59
buffer_index = 9 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 334
buffer_index = 284 , len(sampled_ep) = 113, trace = from 87 to 95
ep_idx[i] = 271
buffer_index = 221 , len(s

exp_idx = [42, 17, 40, 20, 121, 17, 49, 67, 24, 34, 27, 16, 77, 108, 75, 102, 45, 110, 109, 36, 57, 126, 49, 29, 9, 17, 67, 43, 60, 18, 98, 120]
idx_offset = 50, self.episode_index = 349, len(self.buffer) = 300
ep_idx[i] = 280
buffer_index = 230 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 218
buffer_index = 168 , len(sampled_ep) = 80, trace = from 10 to 18
ep_idx[i] = 211
buffer_index = 161 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 68
buffer_index = 18 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 119
buffer_index = 69 , len(sampled_ep) = 183, trace = from 114 to 122
ep_idx[i] = 98
buffer_index = 48 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 186
buffer_index = 136 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 225
buffer_index = 175 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 336
buffer_index = 286 , len(sampled_ep) = 102, trace = from 17 to 25
ep_idx[i] = 159
buffer_index = 109 , len(sampled_ep) = 79, tr

buffer_index = 267 , len(sampled_ep) = 84, trace = from 76 to 84
ep_idx[i] = 178
buffer_index = 128 , len(sampled_ep) = 80, trace = from 8 to 16
ep_idx[i] = 216
buffer_index = 166 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 206
buffer_index = 156 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 112
buffer_index = 62 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 74
buffer_index = 24 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 191
buffer_index = 141 , len(sampled_ep) = 131, trace = from 63 to 71
ep_idx[i] = 102
buffer_index = 52 , len(sampled_ep) = 103, trace = from 9 to 17
ep_idx[i] = 70
buffer_index = 20 , len(sampled_ep) = 198, trace = from 18 to 26
ep_idx[i] = 176
buffer_index = 126 , len(sampled_ep) = 80, trace = from 3 to 11
ep_idx[i] = 190
buffer_index = 140 , len(sampled_ep) = 42, trace = from 19 to 27
ep_idx[i] = 156
buffer_index = 106 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 299
buffer_index = 249 , len(sampled_ep

buffer_index = 284 , len(sampled_ep) = 113, trace = from 99 to 107
ep_idx[i] = 156
buffer_index = 106 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 336
buffer_index = 286 , len(sampled_ep) = 102, trace = from 12 to 20
ep_idx[i] = 181
buffer_index = 131 , len(sampled_ep) = 43, trace = from 21 to 29
ep_idx[i] = 224
buffer_index = 174 , len(sampled_ep) = 46, trace = from 12 to 20
ep_idx[i] = 235
buffer_index = 185 , len(sampled_ep) = 46, trace = from 7 to 15
ep_idx[i] = 71
buffer_index = 21 , len(sampled_ep) = 299, trace = from 183 to 191
ep_idx[i] = 254
buffer_index = 204 , len(sampled_ep) = 116, trace = from 39 to 47
ep_idx[i] = 308
buffer_index = 258 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 216
buffer_index = 166 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 248
buffer_index = 198 , len(sampled_ep) = 77, trace = from 50 to 58
ep_idx[i] = 173
buffer_index = 123 , len(sampled_ep) = 71, trace = from 15 to 23
ep_idx[i] = 68
buffer_index = 18 , len(sa

buffer_index = 276 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 56
buffer_index = 6 , len(sampled_ep) = 198, trace = from 52 to 60
ep_idx[i] = 74
buffer_index = 24 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 215
buffer_index = 165 , len(sampled_ep) = 117, trace = from 23 to 31
ep_idx[i] = 135
buffer_index = 85 , len(sampled_ep) = 211, trace = from 161 to 169
ep_idx[i] = 337
buffer_index = 287 , len(sampled_ep) = 82, trace = from 73 to 81
ep_idx[i] = 207
buffer_index = 157 , len(sampled_ep) = 75, trace = from 66 to 74
ep_idx[i] = 70
buffer_index = 20 , len(sampled_ep) = 198, trace = from 124 to 132
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.995761000000035
epsilon is = 0.995759500000035
epsilon is = 0.995758000000035
epsilon is = 0.995756500000035
epsilon is = 0.995755000000035
Target Set Success
ep_idx = [119, 336, 95, 178, 83, 169, 260, 70, 61, 220, 136, 334, 348, 242, 287, 321, 175, 156, 294, 71, 334, 334, 332, 231, 86, 170, 208, 222, 207, 112, 336, 338

buffer_index = 161 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 113
buffer_index = 63 , len(sampled_ep) = 105, trace = from 50 to 58
ep_idx[i] = 312
buffer_index = 262 , len(sampled_ep) = 108, trace = from 38 to 46
ep_idx[i] = 186
buffer_index = 136 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 134
buffer_index = 84 , len(sampled_ep) = 208, trace = from 91 to 99
ep_idx[i] = 342
buffer_index = 292 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 134
buffer_index = 84 , len(sampled_ep) = 208, trace = from 119 to 127
ep_idx[i] = 135
buffer_index = 85 , len(sampled_ep) = 211, trace = from 56 to 64
ep_idx[i] = 231
buffer_index = 181 , len(sampled_ep) = 77, trace = from 40 to 48
ep_idx[i] = 231
buffer_index = 181 , len(sampled_ep) = 77, trace = from 68 to 76
ep_idx[i] = 137
buffer_index = 87 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 317
buffer_index = 267 , len(sampled_ep) = 84, trace = from 59 to 67
ep_idx[i] = 77
buffer_index = 27 , len(samp

buffer_index = 43 , len(sampled_ep) = 77, trace = from 38 to 46
ep_idx[i] = 160
buffer_index = 110 , len(sampled_ep) = 117, trace = from 70 to 78
ep_idx[i] = 315
buffer_index = 265 , len(sampled_ep) = 81, trace = from 16 to 24
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9957235000000353
epsilon is = 0.9957220000000353
epsilon is = 0.9957205000000353
epsilon is = 0.9957190000000353
epsilon is = 0.9957175000000353
Target Set Success
ep_idx = [76, 334, 100, 135, 199, 282, 274, 113, 334, 210, 287, 105, 136, 336, 197, 136, 78, 208, 346, 61, 195, 342, 105, 294, 146, 331, 125, 125, 183, 226, 90, 188]
exp_idx = [111, 85, 86, 48, 80, 12, 54, 23, 102, 52, 128, 45, 106, 51, 39, 88, 17, 16, 54, 16, 26, 71, 13, 67, 19, 60, 14, 15, 16, 7, 17, 28]
idx_offset = 50, self.episode_index = 349, len(self.buffer) = 300
ep_idx[i] = 76
buffer_index = 26 , len(sampled_ep) = 198, trace = from 104 to 112
ep_idx[i] = 334
buffer_index = 284 , len(sampled_ep) = 113, trace = from 78 to 86
ep_idx[i] = 100
buffer_

ep_idx[i] = 135
buffer_index = 85 , len(sampled_ep) = 211, trace = from 37 to 45
ep_idx[i] = 164
buffer_index = 114 , len(sampled_ep) = 118, trace = from 89 to 97
ep_idx[i] = 79
buffer_index = 29 , len(sampled_ep) = 75, trace = from 7 to 15
ep_idx[i] = 298
buffer_index = 248 , len(sampled_ep) = 115, trace = from 92 to 100
ep_idx[i] = 53
buffer_index = 3 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 289
buffer_index = 239 , len(sampled_ep) = 83, trace = from 62 to 70
ep_idx[i] = 303
buffer_index = 253 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 161
buffer_index = 111 , len(sampled_ep) = 114, trace = from 19 to 27
ep_idx[i] = 346
buffer_index = 296 , len(sampled_ep) = 70, trace = from 47 to 55
ep_idx[i] = 134
buffer_index = 84 , len(sampled_ep) = 208, trace = from 75 to 83
ep_idx[i] = 115
buffer_index = 65 , len(sampled_ep) = 299, trace = from 129 to 137
ep_idx[i] = 77
buffer_index = 27 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 103
buffer_index = 

ep_idx = [128, 300, 168, 107, 71, 264, 280, 151, 139, 309, 334, 345, 55, 154, 314, 128, 320, 161, 135, 334, 290, 230, 164, 349, 176, 67, 281, 309, 251, 50, 227, 314]
exp_idx = [47, 7, 80, 48, 153, 76, 25, 30, 118, 66, 96, 35, 11, 27, 66, 68, 55, 50, 76, 105, 150, 34, 37, 165, 69, 15, 114, 50, 94, 13, 101, 40]
idx_offset = 50, self.episode_index = 349, len(self.buffer) = 300
ep_idx[i] = 128
buffer_index = 78 , len(sampled_ep) = 74, trace = from 40 to 48
ep_idx[i] = 300
buffer_index = 250 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 168
buffer_index = 118 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 107
buffer_index = 57 , len(sampled_ep) = 109, trace = from 41 to 49
ep_idx[i] = 71
buffer_index = 21 , len(sampled_ep) = 299, trace = from 146 to 154
ep_idx[i] = 264
buffer_index = 214 , len(sampled_ep) = 109, trace = from 69 to 77
ep_idx[i] = 280
buffer_index = 230 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 151
buffer_index = 101 , len(sampled_ep) = 82

buffer_index = 284 , len(sampled_ep) = 113, trace = from 103 to 111
ep_idx[i] = 334
buffer_index = 284 , len(sampled_ep) = 113, trace = from 43 to 51
ep_idx[i] = 247
buffer_index = 197 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 231
buffer_index = 181 , len(sampled_ep) = 77, trace = from 53 to 61
ep_idx[i] = 309
buffer_index = 259 , len(sampled_ep) = 115, trace = from 61 to 69
ep_idx[i] = 170
buffer_index = 120 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 179
buffer_index = 129 , len(sampled_ep) = 77, trace = from 23 to 31
ep_idx[i] = 119
buffer_index = 69 , len(sampled_ep) = 183, trace = from 103 to 111
ep_idx[i] = 207
buffer_index = 157 , len(sampled_ep) = 75, trace = from 12 to 20
ep_idx[i] = 334
buffer_index = 284 , len(sampled_ep) = 113, trace = from 54 to 62
ep_idx[i] = 110
buffer_index = 60 , len(sampled_ep) = 137, trace = from 116 to 124
ep_idx[i] = 258
buffer_index = 208 , len(sampled_ep) = 76, trace = from 2 to 10
ep_idx[i] = 287
buffer_index = 237 

buffer_index = 42 , len(sampled_ep) = 281, trace = from 128 to 136
ep_idx[i] = 226
buffer_index = 176 , len(sampled_ep) = 120, trace = from 54 to 62
ep_idx[i] = 349
buffer_index = 299 , len(sampled_ep) = 228, trace = from 95 to 103
ep_idx[i] = 98
buffer_index = 48 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 105
buffer_index = 55 , len(sampled_ep) = 74, trace = from 15 to 23
ep_idx[i] = 314
buffer_index = 264 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 119
buffer_index = 69 , len(sampled_ep) = 183, trace = from 75 to 83
ep_idx[i] = 92
buffer_index = 42 , len(sampled_ep) = 281, trace = from 47 to 55
ep_idx[i] = 152
buffer_index = 102 , len(sampled_ep) = 228, trace = from 89 to 97
ep_idx[i] = 110
buffer_index = 60 , len(sampled_ep) = 137, trace = from 16 to 24
ep_idx[i] = 115
buffer_index = 65 , len(sampled_ep) = 299, trace = from 43 to 51
ep_idx[i] = 133
buffer_index = 83 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 306
buffer_index = 256 , len(sam

buffer_index = 61 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 242
buffer_index = 191 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 328
buffer_index = 277 , len(sampled_ep) = 75, trace = from 54 to 62
ep_idx[i] = 243
buffer_index = 192 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 154
buffer_index = 103 , len(sampled_ep) = 153, trace = from 116 to 124
ep_idx[i] = 215
buffer_index = 164 , len(sampled_ep) = 117, trace = from 14 to 22
ep_idx[i] = 223
buffer_index = 172 , len(sampled_ep) = 112, trace = from 4 to 12
ep_idx[i] = 160
buffer_index = 109 , len(sampled_ep) = 117, trace = from 89 to 97
ep_idx[i] = 236
buffer_index = 185 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 264
buffer_index = 213 , len(sampled_ep) = 109, trace = from 42 to 50
ep_idx[i] = 232
buffer_index = 181 , len(sampled_ep) = 81, trace = from 10 to 18
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9956260000000361
epsilon is = 0.9956245000000361
epsilon is = 0.99562300000

buffer_index = 181 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 158
buffer_index = 107 , len(sampled_ep) = 83, trace = from 6 to 14
ep_idx[i] = 275
buffer_index = 224 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 295
buffer_index = 244 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 110
buffer_index = 59 , len(sampled_ep) = 137, trace = from 14 to 22
ep_idx[i] = 96
buffer_index = 45 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 284
buffer_index = 233 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 343
buffer_index = 292 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 158
buffer_index = 107 , len(sampled_ep) = 83, trace = from 73 to 81
ep_idx[i] = 56
buffer_index = 5 , len(sampled_ep) = 198, trace = from 103 to 111
ep_idx[i] = 97
buffer_index = 46 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 210
buffer_index = 159 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 310
buffer_index = 259 , len(sampled_ep

buffer_index = 234 , len(sampled_ep) = 166, trace = from 115 to 123
ep_idx[i] = 329
buffer_index = 278 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 54
buffer_index = 3 , len(sampled_ep) = 47, trace = from 38 to 46
ep_idx[i] = 300
buffer_index = 249 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 299
buffer_index = 248 , len(sampled_ep) = 100, trace = from 3 to 11
ep_idx[i] = 289
buffer_index = 238 , len(sampled_ep) = 83, trace = from 10 to 18
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9955885000000364
epsilon is = 0.9955870000000364
epsilon is = 0.9955855000000364
epsilon is = 0.9955840000000364
epsilon is = 0.9955825000000365
Target Set Success
ep_idx = [220, 348, 230, 186, 174, 265, 237, 164, 260, 278, 302, 91, 332, 278, 334, 171, 330, 55, 52, 119, 211, 241, 340, 150, 299, 121, 197, 71, 222, 342, 240, 259]
exp_idx = [64, 67, 19, 64, 18, 71, 55, 19, 68, 47, 19, 28, 73, 67, 108, 31, 58, 7, 58, 46, 48, 73, 17, 12, 56, 14, 76, 208, 89, 42, 25, 50]
idx_offset =

ep_idx[i] = 68
buffer_index = 17 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 142
buffer_index = 91 , len(sampled_ep) = 108, trace = from 6 to 14
ep_idx[i] = 324
buffer_index = 273 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 150
buffer_index = 99 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 261
buffer_index = 210 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 309
buffer_index = 258 , len(sampled_ep) = 115, trace = from 87 to 95
ep_idx[i] = 140
buffer_index = 89 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 234
buffer_index = 183 , len(sampled_ep) = 167, trace = from 156 to 164
ep_idx[i] = 53
buffer_index = 2 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 300
buffer_index = 249 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 56
buffer_index = 5 , len(sampled_ep) = 198, trace = from 48 to 56
ep_idx[i] = 89
buffer_index = 38 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 228
buffer_index = 177 , le

epsilon is = 0.9955495000000367
epsilon is = 0.9955480000000367
epsilon is = 0.9955465000000367
epsilon is = 0.9955450000000368
Target Set Success
ep_idx = [297, 337, 310, 332, 240, 234, 71, 262, 275, 107, 221, 119, 96, 206, 176, 88, 134, 340, 334, 115, 230, 345, 284, 78, 51, 121, 245, 76, 254, 251, 316, 126]
exp_idx = [17, 20, 71, 50, 72, 117, 202, 11, 43, 100, 16, 35, 16, 71, 62, 110, 165, 130, 98, 228, 66, 49, 42, 64, 116, 19, 44, 82, 34, 56, 48, 14]
idx_offset = 51, self.episode_index = 350, len(self.buffer) = 300
ep_idx[i] = 297
buffer_index = 246 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 337
buffer_index = 286 , len(sampled_ep) = 82, trace = from 13 to 21
ep_idx[i] = 310
buffer_index = 259 , len(sampled_ep) = 164, trace = from 64 to 72
ep_idx[i] = 332
buffer_index = 281 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 240
buffer_index = 189 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 234
buffer_index = 183 , len(sampled_ep) = 167, trace = fr

buffer_index = 203 , len(sampled_ep) = 116, trace = from 74 to 82
ep_idx[i] = 299
buffer_index = 248 , len(sampled_ep) = 100, trace = from 6 to 14
ep_idx[i] = 268
buffer_index = 217 , len(sampled_ep) = 120, trace = from 12 to 20
ep_idx[i] = 227
buffer_index = 176 , len(sampled_ep) = 112, trace = from 97 to 105
ep_idx[i] = 251
buffer_index = 200 , len(sampled_ep) = 133, trace = from 39 to 47
ep_idx[i] = 178
buffer_index = 127 , len(sampled_ep) = 80, trace = from 57 to 65
ep_idx[i] = 76
buffer_index = 25 , len(sampled_ep) = 198, trace = from 161 to 169
ep_idx[i] = 200
buffer_index = 149 , len(sampled_ep) = 82, trace = from 8 to 16
ep_idx[i] = 61
buffer_index = 10 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 307
buffer_index = 256 , len(sampled_ep) = 48, trace = from 30 to 38
ep_idx[i] = 180
buffer_index = 129 , len(sampled_ep) = 141, trace = from 5 to 13
ep_idx[i] = 241
buffer_index = 190 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 309
buffer_index = 258 , len(

idx_offset = 51, self.episode_index = 350, len(self.buffer) = 300
ep_idx[i] = 287
buffer_index = 236 , len(sampled_ep) = 194, trace = from 165 to 173
ep_idx[i] = 158
buffer_index = 107 , len(sampled_ep) = 83, trace = from 68 to 76
ep_idx[i] = 204
buffer_index = 153 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 252
buffer_index = 201 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 153
buffer_index = 102 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 277
buffer_index = 226 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 142
buffer_index = 91 , len(sampled_ep) = 108, trace = from 48 to 56
ep_idx[i] = 240
buffer_index = 189 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 215
buffer_index = 164 , len(sampled_ep) = 117, trace = from 23 to 31
ep_idx[i] = 262
buffer_index = 211 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 209
buffer_index = 158 , len(sampled_ep) = 71, trace = from 18 to 26
ep_idx[i] = 266
buffer_index = 215 , len(

buffer_index = 176 , len(sampled_ep) = 112, trace = from 37 to 45
ep_idx[i] = 246
buffer_index = 195 , len(sampled_ep) = 47, trace = from 9 to 17
ep_idx[i] = 70
buffer_index = 19 , len(sampled_ep) = 198, trace = from 175 to 183
ep_idx[i] = 243
buffer_index = 192 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 171
buffer_index = 120 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 179
buffer_index = 128 , len(sampled_ep) = 77, trace = from 28 to 36
ep_idx[i] = 163
buffer_index = 112 , len(sampled_ep) = 134, trace = from 74 to 82
ep_idx[i] = 195
buffer_index = 144 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 88
buffer_index = 37 , len(sampled_ep) = 114, trace = from 86 to 94
ep_idx[i] = 347
buffer_index = 296 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 76
buffer_index = 25 , len(sampled_ep) = 198, trace = from 2 to 10
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9954910000000372
epsilon is = 0.9954895000000372
epsilon is = 0.99548800000003

buffer_index = 69 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 222
buffer_index = 171 , len(sampled_ep) = 224, trace = from 110 to 118
ep_idx[i] = 88
buffer_index = 37 , len(sampled_ep) = 114, trace = from 9 to 17
ep_idx[i] = 281
buffer_index = 230 , len(sampled_ep) = 178, trace = from 29 to 37
ep_idx[i] = 115
buffer_index = 64 , len(sampled_ep) = 299, trace = from 71 to 79
ep_idx[i] = 98
buffer_index = 47 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 217
buffer_index = 166 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 213
buffer_index = 162 , len(sampled_ep) = 148, trace = from 84 to 92
ep_idx[i] = 320
buffer_index = 269 , len(sampled_ep) = 71, trace = from 4 to 12
ep_idx[i] = 218
buffer_index = 167 , len(sampled_ep) = 80, trace = from 24 to 32
ep_idx[i] = 195
buffer_index = 144 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 81
buffer_index = 30 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 320
buffer_index = 269 , len(sampled

ep_idx[i] = 270
buffer_index = 219 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 247
buffer_index = 196 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 293
buffer_index = 242 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 136
buffer_index = 85 , len(sampled_ep) = 108, trace = from 3 to 11
ep_idx[i] = 198
buffer_index = 147 , len(sampled_ep) = 42, trace = from 31 to 39
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9954535000000375
epsilon is = 0.9954520000000375
epsilon is = 0.9954505000000375
epsilon is = 0.9954490000000376
epsilon is = 0.9954475000000376
Target Set Success
ep_idx = [350, 310, 225, 100, 167, 53, 143, 134, 240, 317, 101, 262, 334, 238, 322, 250, 299, 217, 276, 222, 340, 118, 259, 148, 287, 334, 170, 115, 80, 238, 149, 272]
exp_idx = [71, 124, 77, 88, 35, 7, 51, 21, 16, 19, 19, 51, 64, 57, 33, 11, 13, 17, 33, 121, 137, 69, 17, 71, 17, 102, 21, 53, 16, 61, 76, 45]
idx_offset = 51, self.episode_index = 350, len(self.buffer) = 300
ep_idx[i]

buffer_index = 283 , len(sampled_ep) = 113, trace = from 99 to 107
ep_idx[i] = 329
buffer_index = 278 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 306
buffer_index = 255 , len(sampled_ep) = 117, trace = from 54 to 62
ep_idx[i] = 159
buffer_index = 108 , len(sampled_ep) = 79, trace = from 41 to 49
ep_idx[i] = 281
buffer_index = 230 , len(sampled_ep) = 178, trace = from 5 to 13
ep_idx[i] = 78
buffer_index = 27 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 280
buffer_index = 229 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 306
buffer_index = 255 , len(sampled_ep) = 117, trace = from 39 to 47
ep_idx[i] = 217
buffer_index = 166 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 177
buffer_index = 126 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 345
buffer_index = 294 , len(sampled_ep) = 108, trace = from 57 to 65
ep_idx[i] = 60
buffer_index = 9 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 235
buffer_index = 184 , len(sam

length of poped element = 145 , cntr = 138 , diff = 7
epsilon is = 0.9954145000000378
epsilon is = 0.9954130000000379
epsilon is = 0.9954115000000379
epsilon is = 0.9954100000000379
Target Set Success
ep_idx = [349, 297, 174, 191, 136, 251, 99, 313, 328, 268, 264, 247, 222, 239, 99, 280, 259, 310, 333, 255, 111, 328, 219, 283, 57, 172, 215, 311, 298, 213, 298, 76]
exp_idx = [215, 16, 57, 19, 10, 46, 59, 58, 17, 31, 38, 64, 123, 52, 50, 18, 56, 42, 34, 12, 36, 33, 14, 57, 92, 35, 22, 25, 17, 117, 48, 111]
idx_offset = 52, self.episode_index = 351, len(self.buffer) = 300
ep_idx[i] = 349
buffer_index = 297 , len(sampled_ep) = 228, trace = from 208 to 216
ep_idx[i] = 297
buffer_index = 245 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 174
buffer_index = 122 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 191
buffer_index = 139 , len(sampled_ep) = 131, trace = from 12 to 20
ep_idx[i] = 136
buffer_index = 84 , len(sampled_ep) = 108, trace = from 3 to 11
ep_idx[i] = 251
b

buffer_index = 282 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 257
buffer_index = 205 , len(sampled_ep) = 82, trace = from 11 to 19
ep_idx[i] = 222
buffer_index = 170 , len(sampled_ep) = 224, trace = from 102 to 110
ep_idx[i] = 163
buffer_index = 111 , len(sampled_ep) = 134, trace = from 40 to 48
ep_idx[i] = 276
buffer_index = 224 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 202
buffer_index = 150 , len(sampled_ep) = 143, trace = from 38 to 46
ep_idx[i] = 254
buffer_index = 202 , len(sampled_ep) = 116, trace = from 58 to 66
ep_idx[i] = 322
buffer_index = 270 , len(sampled_ep) = 111, trace = from 97 to 105
ep_idx[i] = 70
buffer_index = 18 , len(sampled_ep) = 198, trace = from 71 to 79
ep_idx[i] = 327
buffer_index = 275 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 149
buffer_index = 97 , len(sampled_ep) = 82, trace = from 12 to 20
ep_idx[i] = 144
buffer_index = 92 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 331
buffer_index = 279 ,

exp_idx = [27, 27, 31, 62, 49, 108, 122, 103, 56, 35, 28, 69, 71, 68, 39, 73, 69, 43, 18, 127, 26, 20, 73, 34, 59, 44, 77, 24, 61, 54, 16, 51]
idx_offset = 52, self.episode_index = 351, len(self.buffer) = 300
ep_idx[i] = 262
buffer_index = 210 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 59
buffer_index = 7 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 327
buffer_index = 275 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 344
buffer_index = 292 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 257
buffer_index = 205 , len(sampled_ep) = 82, trace = from 42 to 50
ep_idx[i] = 119
buffer_index = 67 , len(sampled_ep) = 183, trace = from 101 to 109
ep_idx[i] = 70
buffer_index = 18 , len(sampled_ep) = 198, trace = from 115 to 123
ep_idx[i] = 251
buffer_index = 199 , len(sampled_ep) = 133, trace = from 96 to 104
ep_idx[i] = 335
buffer_index = 283 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 331
buffer_index = 279 , len(sampled_ep) = 84, t

buffer_index = 24 , len(sampled_ep) = 198, trace = from 185 to 193
ep_idx[i] = 85
buffer_index = 33 , len(sampled_ep) = 108, trace = from 68 to 76
ep_idx[i] = 277
buffer_index = 225 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 330
buffer_index = 278 , len(sampled_ep) = 175, trace = from 82 to 90
ep_idx[i] = 121
buffer_index = 69 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 333
buffer_index = 281 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 330
buffer_index = 278 , len(sampled_ep) = 175, trace = from 122 to 130
ep_idx[i] = 216
buffer_index = 164 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 93
buffer_index = 41 , len(sampled_ep) = 77, trace = from 27 to 35
ep_idx[i] = 281
buffer_index = 229 , len(sampled_ep) = 178, trace = from 108 to 116
ep_idx[i] = 220
buffer_index = 168 , len(sampled_ep) = 118, trace = from 32 to 40
ep_idx[i] = 94
buffer_index = 42 , len(sampled_ep) = 45, trace = from 11 to 19
ep_idx[i] = 85
buffer_index = 33 , len(sam

buffer_index = 291 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 89
buffer_index = 36 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 329
buffer_index = 276 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 211
buffer_index = 158 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 308
buffer_index = 255 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 249
buffer_index = 196 , len(sampled_ep) = 75, trace = from 38 to 46
ep_idx[i] = 71
buffer_index = 18 , len(sampled_ep) = 299, trace = from 225 to 233
ep_idx[i] = 321
buffer_index = 268 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 281 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 310
buffer_index = 257 , len(sampled_ep) = 164, trace = from 62 to 70
ep_idx[i] = 270
buffer_index = 217 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 281 , len(sampled_ep) = 113, trace = from 7 to 15
ep_idx[i] = 204
buffer_index = 151 , len(sa

buffer_index = 234 , len(sampled_ep) = 194, trace = from 11 to 19
ep_idx[i] = 330
buffer_index = 277 , len(sampled_ep) = 175, trace = from 77 to 85
ep_idx[i] = 350
buffer_index = 297 , len(sampled_ep) = 198, trace = from 43 to 51
ep_idx[i] = 119
buffer_index = 66 , len(sampled_ep) = 183, trace = from 86 to 94
ep_idx[i] = 92
buffer_index = 39 , len(sampled_ep) = 281, trace = from 226 to 234
ep_idx[i] = 221
buffer_index = 168 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 340
buffer_index = 287 , len(sampled_ep) = 142, trace = from 118 to 126
ep_idx[i] = 295
buffer_index = 242 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 251
buffer_index = 198 , len(sampled_ep) = 133, trace = from 26 to 34
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9953185000000386
epsilon is = 0.9953170000000386
epsilon is = 0.9953155000000387
epsilon is = 0.9953140000000387
epsilon is = 0.9953125000000387
Target Set Success
ep_idx = [264, 199, 142, 102, 92, 332, 310, 115, 125, 109, 122, 163

buffer_index = 269 , len(sampled_ep) = 111, trace = from 22 to 30
ep_idx[i] = 66
buffer_index = 13 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 226
buffer_index = 173 , len(sampled_ep) = 120, trace = from 37 to 45
ep_idx[i] = 236
buffer_index = 183 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 97
buffer_index = 44 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 61
buffer_index = 8 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 123
buffer_index = 70 , len(sampled_ep) = 198, trace = from 47 to 55
ep_idx[i] = 200
buffer_index = 147 , len(sampled_ep) = 82, trace = from 69 to 77
ep_idx[i] = 332
buffer_index = 279 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 330
buffer_index = 277 , len(sampled_ep) = 175, trace = from 46 to 54
ep_idx[i] = 128
buffer_index = 75 , len(sampled_ep) = 74, trace = from 17 to 25
ep_idx[i] = 243
buffer_index = 190 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 140
buffer_index = 87 , len(sampled_ep

buffer_index = 66 , len(sampled_ep) = 183, trace = from 37 to 45
ep_idx[i] = 150
buffer_index = 97 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 201
buffer_index = 148 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 73
buffer_index = 20 , len(sampled_ep) = 81, trace = from 36 to 44
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9952810000000389
epsilon is = 0.995279500000039
epsilon is = 0.995278000000039
epsilon is = 0.995276500000039
epsilon is = 0.995275000000039
Target Set Success
ep_idx = [332, 316, 334, 88, 330, 113, 265, 329, 276, 152, 214, 334, 89, 329, 167, 134, 62, 349, 139, 242, 286, 202, 71, 120, 345, 345, 254, 224, 330, 256, 281, 327]
exp_idx = [35, 17, 95, 100, 75, 19, 69, 60, 64, 95, 23, 106, 27, 9, 17, 20, 16, 35, 18, 79, 16, 12, 119, 18, 60, 101, 37, 37, 103, 17, 58, 20]
idx_offset = 53, self.episode_index = 352, len(self.buffer) = 300
ep_idx[i] = 332
buffer_index = 279 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 316
buffer_index = 

buffer_index = 144 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 309
buffer_index = 256 , len(sampled_ep) = 115, trace = from 16 to 24
ep_idx[i] = 185
buffer_index = 132 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 77
buffer_index = 24 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 187
buffer_index = 134 , len(sampled_ep) = 84, trace = from 48 to 56
ep_idx[i] = 215
buffer_index = 162 , len(sampled_ep) = 117, trace = from 50 to 58
ep_idx[i] = 106
buffer_index = 53 , len(sampled_ep) = 103, trace = from 0 to 8
ep_idx[i] = 193
buffer_index = 140 , len(sampled_ep) = 108, trace = from 3 to 11
ep_idx[i] = 106
buffer_index = 53 , len(sampled_ep) = 103, trace = from 25 to 33
ep_idx[i] = 116
buffer_index = 63 , len(sampled_ep) = 162, trace = from 5 to 13
ep_idx[i] = 271
buffer_index = 218 , len(sampled_ep) = 46, trace = from 8 to 16
ep_idx[i] = 342
buffer_index = 289 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 76
buffer_index = 23 , len(sampled_ep)

epsilon is = 0.9952375000000393
Target Set Success
ep_idx = [67, 321, 334, 151, 312, 212, 329, 71, 286, 212, 176, 111, 195, 340, 206, 136, 90, 120, 153, 334, 223, 110, 211, 67, 328, 58, 84, 69, 143, 136, 171, 334]
exp_idx = [27, 78, 85, 20, 12, 32, 21, 160, 82, 63, 68, 64, 16, 140, 67, 25, 69, 74, 17, 94, 20, 18, 8, 16, 42, 46, 19, 34, 16, 29, 18, 90]
idx_offset = 53, self.episode_index = 352, len(self.buffer) = 300
ep_idx[i] = 67
buffer_index = 14 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 321
buffer_index = 268 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 334
buffer_index = 281 , len(sampled_ep) = 113, trace = from 78 to 86
ep_idx[i] = 151
buffer_index = 98 , len(sampled_ep) = 82, trace = from 13 to 21
ep_idx[i] = 312
buffer_index = 259 , len(sampled_ep) = 108, trace = from 5 to 13
ep_idx[i] = 212
buffer_index = 159 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 329
buffer_index = 276 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 71


buffer_index = 159 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 302
buffer_index = 249 , len(sampled_ep) = 79, trace = from 44 to 52
ep_idx[i] = 169
buffer_index = 116 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 268
buffer_index = 215 , len(sampled_ep) = 120, trace = from 79 to 87
ep_idx[i] = 189
buffer_index = 136 , len(sampled_ep) = 84, trace = from 35 to 43
ep_idx[i] = 199
buffer_index = 146 , len(sampled_ep) = 116, trace = from 66 to 74
ep_idx[i] = 290
buffer_index = 237 , len(sampled_ep) = 152, trace = from 69 to 77
ep_idx[i] = 81
buffer_index = 28 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 161
buffer_index = 108 , len(sampled_ep) = 114, trace = from 76 to 84
ep_idx[i] = 247
buffer_index = 194 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 113
buffer_index = 60 , len(sampled_ep) = 105, trace = from 17 to 25
ep_idx[i] = 145
buffer_index = 92 , len(sampled_ep) = 117, trace = from 106 to 114
ep_idx[i] = 334
buffer_index = 281 , len(

idx_offset = 54, self.episode_index = 353, len(self.buffer) = 300
ep_idx[i] = 343
buffer_index = 289 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 129
buffer_index = 75 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 94
buffer_index = 40 , len(sampled_ep) = 45, trace = from 33 to 41
ep_idx[i] = 216
buffer_index = 162 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 287
buffer_index = 233 , len(sampled_ep) = 194, trace = from 70 to 78
ep_idx[i] = 334
buffer_index = 280 , len(sampled_ep) = 113, trace = from 98 to 106
ep_idx[i] = 223
buffer_index = 169 , len(sampled_ep) = 112, trace = from 58 to 66
ep_idx[i] = 231
buffer_index = 177 , len(sampled_ep) = 77, trace = from 11 to 19
ep_idx[i] = 145
buffer_index = 91 , len(sampled_ep) = 117, trace = from 91 to 99
ep_idx[i] = 291
buffer_index = 237 , len(sampled_ep) = 103, trace = from 94 to 102
ep_idx[i] = 274
buffer_index = 220 , len(sampled_ep) = 114, trace = from 37 to 45
ep_idx[i] = 303
buffer_index = 249 , len(

buffer_index = 69 , len(sampled_ep) = 198, trace = from 139 to 147
ep_idx[i] = 102
buffer_index = 48 , len(sampled_ep) = 103, trace = from 88 to 96
ep_idx[i] = 115
buffer_index = 61 , len(sampled_ep) = 299, trace = from 71 to 79
ep_idx[i] = 128
buffer_index = 74 , len(sampled_ep) = 74, trace = from 34 to 42
ep_idx[i] = 163
buffer_index = 109 , len(sampled_ep) = 134, trace = from 40 to 48
ep_idx[i] = 237
buffer_index = 183 , len(sampled_ep) = 116, trace = from 41 to 49
ep_idx[i] = 82
buffer_index = 28 , len(sampled_ep) = 73, trace = from 0 to 8
ep_idx[i] = 287
buffer_index = 233 , len(sampled_ep) = 194, trace = from 45 to 53
ep_idx[i] = 166
buffer_index = 112 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 298
buffer_index = 244 , len(sampled_ep) = 115, trace = from 11 to 19
ep_idx[i] = 166
buffer_index = 112 , len(sampled_ep) = 81, trace = from 62 to 70
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9951835000000397
epsilon is = 0.9951820000000398
epsilon is = 0.995180500000

buffer_index = 281 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 280
buffer_index = 226 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 148
buffer_index = 94 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 204
buffer_index = 150 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 331
buffer_index = 277 , len(sampled_ep) = 84, trace = from 16 to 24
ep_idx[i] = 261
buffer_index = 207 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 337
buffer_index = 283 , len(sampled_ep) = 82, trace = from 21 to 29
ep_idx[i] = 290
buffer_index = 236 , len(sampled_ep) = 152, trace = from 14 to 22
ep_idx[i] = 278
buffer_index = 224 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 301
buffer_index = 247 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 286
buffer_index = 232 , len(sampled_ep) = 147, trace = from 107 to 115
ep_idx[i] = 117
buffer_index = 63 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 135
buffer_index = 81 , len(samp

ep_idx[i] = 92
buffer_index = 37 , len(sampled_ep) = 281, trace = from 145 to 153
ep_idx[i] = 349
buffer_index = 294 , len(sampled_ep) = 228, trace = from 191 to 199
ep_idx[i] = 336
buffer_index = 281 , len(sampled_ep) = 102, trace = from 89 to 97
ep_idx[i] = 181
buffer_index = 126 , len(sampled_ep) = 43, trace = from 35 to 43
ep_idx[i] = 244
buffer_index = 189 , len(sampled_ep) = 17, trace = from 3 to 11
ep_idx[i] = 122
buffer_index = 67 , len(sampled_ep) = 81, trace = from 18 to 26
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.99514600000004
epsilon is = 0.9951445000000401
epsilon is = 0.9951430000000401
epsilon is = 0.9951415000000401
epsilon is = 0.9951400000000401
Target Set Success
ep_idx = [115, 90, 276, 92, 334, 165, 321, 281, 204, 204, 247, 188, 105, 337, 331, 196, 280, 161, 295, 140, 202, 190, 270, 248, 160, 205, 335, 107, 217, 64, 175, 296]
exp_idx = [258, 68, 25, 107, 76, 49, 37, 119, 75, 27, 16, 18, 57, 19, 52, 19, 54, 20, 60, 16, 112, 29, 56, 28, 80, 71, 24, 72, 22, 31,

buffer_index = 90 , len(sampled_ep) = 117, trace = from 57 to 65
ep_idx[i] = 334
buffer_index = 279 , len(sampled_ep) = 113, trace = from 69 to 77
ep_idx[i] = 154
buffer_index = 99 , len(sampled_ep) = 153, trace = from 25 to 33
ep_idx[i] = 234
buffer_index = 179 , len(sampled_ep) = 167, trace = from 51 to 59
ep_idx[i] = 97
buffer_index = 42 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 335
buffer_index = 280 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 329
buffer_index = 274 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 136
buffer_index = 81 , len(sampled_ep) = 108, trace = from 75 to 83
ep_idx[i] = 302
buffer_index = 247 , len(sampled_ep) = 79, trace = from 25 to 33
ep_idx[i] = 83
buffer_index = 28 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 64
buffer_index = 9 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 107
buffer_index = 52 , len(sampled_ep) = 109, trace = from 42 to 50
ep_idx[i] = 238
buffer_index = 183 , len(sampled_ep

buffer_index = 274 , len(sampled_ep) = 81, trace = from 39 to 47
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9951085000000404
epsilon is = 0.9951070000000404
epsilon is = 0.9951055000000404
epsilon is = 0.9951040000000404
epsilon is = 0.9951025000000404
Target Set Success
ep_idx = [349, 117, 334, 134, 100, 138, 319, 127, 126, 332, 138, 334, 262, 92, 123, 150, 169, 312, 154, 71, 172, 334, 291, 157, 313, 191, 249, 349, 223, 294, 123, 330]
exp_idx = [71, 11, 107, 110, 30, 19, 61, 37, 14, 63, 56, 91, 20, 236, 91, 15, 17, 48, 59, 79, 18, 86, 23, 89, 35, 44, 11, 150, 72, 56, 88, 33]
idx_offset = 55, self.episode_index = 354, len(self.buffer) = 300
ep_idx[i] = 349
buffer_index = 294 , len(sampled_ep) = 228, trace = from 64 to 72
ep_idx[i] = 117
buffer_index = 62 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 334
buffer_index = 279 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 134
buffer_index = 79 , len(sampled_ep) = 208, trace = from 103 to 111
ep_idx[i] = 100
bu

buffer_index = 212 , len(sampled_ep) = 83, trace = from 26 to 34
ep_idx[i] = 172
buffer_index = 117 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 67
buffer_index = 12 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 292
buffer_index = 237 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 127
buffer_index = 72 , len(sampled_ep) = 50, trace = from 14 to 22
ep_idx[i] = 154
buffer_index = 99 , len(sampled_ep) = 153, trace = from 64 to 72
ep_idx[i] = 298
buffer_index = 243 , len(sampled_ep) = 115, trace = from 43 to 51
ep_idx[i] = 92
buffer_index = 37 , len(sampled_ep) = 281, trace = from 217 to 225
ep_idx[i] = 334
buffer_index = 279 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 116
buffer_index = 61 , len(sampled_ep) = 162, trace = from 28 to 36
ep_idx[i] = 98
buffer_index = 43 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 334
buffer_index = 279 , len(sampled_ep) = 113, trace = from 99 to 107
ep_idx[i] = 241
buffer_index = 186 , len(sa

exp_idx = [36, 16, 20, 23, 8, 49, 22, 79, 19, 17, 49, 43, 75, 42, 27, 133, 74, 32, 49, 57, 62, 16, 80, 74, 107, 109, 87, 17, 78, 185, 39, 30]
idx_offset = 55, self.episode_index = 354, len(self.buffer) = 300
ep_idx[i] = 62
buffer_index = 7 , len(sampled_ep) = 77, trace = from 29 to 37
ep_idx[i] = 125
buffer_index = 70 , len(sampled_ep) = 75, trace = from 9 to 17
ep_idx[i] = 291
buffer_index = 236 , len(sampled_ep) = 103, trace = from 13 to 21
ep_idx[i] = 295
buffer_index = 240 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 108
buffer_index = 53 , len(sampled_ep) = 45, trace = from 1 to 9
ep_idx[i] = 289
buffer_index = 234 , len(sampled_ep) = 83, trace = from 42 to 50
ep_idx[i] = 97
buffer_index = 42 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 74
buffer_index = 19 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 347
buffer_index = 292 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 191
buffer_index = 136 , len(sampled_ep) = 131, trace = from 1

ep_idx[i] = 334
buffer_index = 279 , len(sampled_ep) = 113, trace = from 91 to 99
ep_idx[i] = 228
buffer_index = 173 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 310
buffer_index = 255 , len(sampled_ep) = 164, trace = from 127 to 135
ep_idx[i] = 92
buffer_index = 37 , len(sampled_ep) = 281, trace = from 196 to 204
ep_idx[i] = 110
buffer_index = 55 , len(sampled_ep) = 137, trace = from 66 to 74
ep_idx[i] = 223
buffer_index = 168 , len(sampled_ep) = 112, trace = from 18 to 26
ep_idx[i] = 245
buffer_index = 190 , len(sampled_ep) = 75, trace = from 42 to 50
ep_idx[i] = 184
buffer_index = 129 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 334
buffer_index = 279 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 255
buffer_index = 200 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 258
buffer_index = 203 , len(sampled_ep) = 76, trace = from 15 to 23
ep_idx[i] = 215
buffer_index = 160 , len(sampled_ep) = 117, trace = from 59 to 67
sampledTraces.shape 

buffer_index = 198 , len(sampled_ep) = 116, trace = from 108 to 116
ep_idx[i] = 182
buffer_index = 126 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 94
buffer_index = 38 , len(sampled_ep) = 45, trace = from 12 to 20
ep_idx[i] = 266
buffer_index = 210 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 283
buffer_index = 227 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 161
buffer_index = 105 , len(sampled_ep) = 114, trace = from 7 to 15
ep_idx[i] = 234
buffer_index = 178 , len(sampled_ep) = 167, trace = from 141 to 149
ep_idx[i] = 321
buffer_index = 265 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 221
buffer_index = 165 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 81
buffer_index = 25 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 351
buffer_index = 295 , len(sampled_ep) = 143, trace = from 112 to 120
ep_idx[i] = 300
buffer_index = 244 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 330
buffer_index = 274 , le

buffer_index = 114 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 125
buffer_index = 69 , len(sampled_ep) = 75, trace = from 60 to 68
ep_idx[i] = 287
buffer_index = 231 , len(sampled_ep) = 194, trace = from 63 to 71
ep_idx[i] = 320
buffer_index = 264 , len(sampled_ep) = 71, trace = from 62 to 70
ep_idx[i] = 260
buffer_index = 204 , len(sampled_ep) = 108, trace = from 76 to 84
ep_idx[i] = 330
buffer_index = 274 , len(sampled_ep) = 175, trace = from 62 to 70
ep_idx[i] = 178
buffer_index = 122 , len(sampled_ep) = 80, trace = from 16 to 24
ep_idx[i] = 239
buffer_index = 183 , len(sampled_ep) = 101, trace = from 40 to 48
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9950110000000412
epsilon is = 0.9950095000000412
epsilon is = 0.9950080000000412
epsilon is = 0.9950065000000412
epsilon is = 0.9950050000000412
Target Set Success
ep_idx = [112, 222, 90, 221, 171, 139, 268, 163, 179, 78, 71, 168, 251, 234, 340, 99, 234, 259, 254, 165, 56, 60, 191, 265, 294, 188, 180, 215, 336, 329,

buffer_index = 94 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 218
buffer_index = 162 , len(sampled_ep) = 80, trace = from 45 to 53
ep_idx[i] = 242
buffer_index = 186 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 187
buffer_index = 131 , len(sampled_ep) = 84, trace = from 29 to 37
ep_idx[i] = 116
buffer_index = 60 , len(sampled_ep) = 162, trace = from 6 to 14
ep_idx[i] = 115
buffer_index = 59 , len(sampled_ep) = 299, trace = from 37 to 45
ep_idx[i] = 332
buffer_index = 276 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 300
buffer_index = 244 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 115
buffer_index = 59 , len(sampled_ep) = 299, trace = from 68 to 76
ep_idx[i] = 287
buffer_index = 231 , len(sampled_ep) = 194, trace = from 166 to 174
ep_idx[i] = 102
buffer_index = 46 , len(sampled_ep) = 103, trace = from 15 to 23
ep_idx[i] = 116
buffer_index = 60 , len(sampled_ep) = 162, trace = from 115 to 123
ep_idx[i] = 186
buffer_index = 130 , len(s

ep_idx[i] = 282
buffer_index = 226 , len(sampled_ep) = 43, trace = from 1 to 9
ep_idx[i] = 226
buffer_index = 170 , len(sampled_ep) = 120, trace = from 48 to 56
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9949735000000415
epsilon is = 0.9949720000000415
epsilon is = 0.9949705000000415
epsilon is = 0.9949690000000415
epsilon is = 0.9949675000000415
Target Set Success
ep_idx = [203, 153, 300, 181, 285, 230, 346, 145, 218, 84, 341, 274, 150, 195, 224, 257, 162, 65, 145, 74, 58, 290, 219, 304, 101, 117, 307, 313, 321, 332, 90, 114]
exp_idx = [55, 63, 15, 34, 58, 9, 32, 13, 54, 32, 22, 21, 33, 55, 10, 32, 80, 11, 75, 52, 14, 30, 34, 66, 21, 51, 8, 12, 9, 58, 73, 70]
idx_offset = 56, self.episode_index = 355, len(self.buffer) = 300
ep_idx[i] = 203
buffer_index = 147 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 153
buffer_index = 97 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 300
buffer_index = 244 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 181
buf

buffer_index = 274 , len(sampled_ep) = 175, trace = from 59 to 67
ep_idx[i] = 347
buffer_index = 291 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 160
buffer_index = 104 , len(sampled_ep) = 117, trace = from 76 to 84
ep_idx[i] = 272
buffer_index = 216 , len(sampled_ep) = 70, trace = from 15 to 23
ep_idx[i] = 154
buffer_index = 98 , len(sampled_ep) = 153, trace = from 14 to 22
ep_idx[i] = 324
buffer_index = 268 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 92
buffer_index = 36 , len(sampled_ep) = 281, trace = from 91 to 99
ep_idx[i] = 330
buffer_index = 274 , len(sampled_ep) = 175, trace = from 45 to 53
ep_idx[i] = 116
buffer_index = 60 , len(sampled_ep) = 162, trace = from 2 to 10
ep_idx[i] = 347
buffer_index = 291 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 330
buffer_index = 274 , len(sampled_ep) = 175, trace = from 77 to 85
ep_idx[i] = 321
buffer_index = 265 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 76
buffer_index = 20 , len(sam

ep_idx = [258, 310, 60, 70, 212, 303, 95, 342, 261, 334, 212, 330, 95, 142, 329, 322, 313, 324, 277, 306, 300, 122, 199, 202, 58, 160, 115, 334, 334, 322, 342, 296]
exp_idx = [23, 119, 69, 74, 23, 17, 18, 60, 28, 101, 25, 138, 63, 45, 57, 63, 47, 43, 14, 88, 71, 10, 49, 16, 17, 16, 120, 93, 110, 102, 43, 83]
idx_offset = 56, self.episode_index = 355, len(self.buffer) = 300
ep_idx[i] = 258
buffer_index = 202 , len(sampled_ep) = 76, trace = from 16 to 24
ep_idx[i] = 310
buffer_index = 254 , len(sampled_ep) = 164, trace = from 112 to 120
ep_idx[i] = 60
buffer_index = 4 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 70
buffer_index = 14 , len(sampled_ep) = 198, trace = from 67 to 75
ep_idx[i] = 212
buffer_index = 156 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 303
buffer_index = 247 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 95
buffer_index = 39 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 342
buffer_index = 286 , len(sampled_ep) = 81, t

buffer_index = 218 , len(sampled_ep) = 114, trace = from 93 to 101
ep_idx[i] = 334
buffer_index = 278 , len(sampled_ep) = 113, trace = from 88 to 96
ep_idx[i] = 100
buffer_index = 44 , len(sampled_ep) = 102, trace = from 12 to 20
ep_idx[i] = 157
buffer_index = 101 , len(sampled_ep) = 103, trace = from 37 to 45
ep_idx[i] = 312
buffer_index = 256 , len(sampled_ep) = 108, trace = from 22 to 30
ep_idx[i] = 56
buffer_index = 0 , len(sampled_ep) = 198, trace = from 72 to 80
ep_idx[i] = 113
buffer_index = 57 , len(sampled_ep) = 105, trace = from 32 to 40
ep_idx[i] = 254
buffer_index = 198 , len(sampled_ep) = 116, trace = from 40 to 48
ep_idx[i] = 237
buffer_index = 181 , len(sampled_ep) = 116, trace = from 104 to 112
ep_idx[i] = 332
buffer_index = 276 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 153
buffer_index = 97 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 63
buffer_index = 7 , len(sampled_ep) = 47, trace = from 20 to 28
ep_idx[i] = 313
buffer_index = 257 , len(

buffer_index = 148 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 350
buffer_index = 294 , len(sampled_ep) = 198, trace = from 62 to 70
ep_idx[i] = 207
buffer_index = 151 , len(sampled_ep) = 75, trace = from 58 to 66
ep_idx[i] = 61
buffer_index = 5 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 350
buffer_index = 294 , len(sampled_ep) = 198, trace = from 170 to 178
ep_idx[i] = 219
buffer_index = 163 , len(sampled_ep) = 110, trace = from 78 to 86
ep_idx[i] = 56
buffer_index = 0 , len(sampled_ep) = 198, trace = from 12 to 20
ep_idx[i] = 157
buffer_index = 101 , len(sampled_ep) = 103, trace = from 8 to 16
ep_idx[i] = 254
buffer_index = 198 , len(sampled_ep) = 116, trace = from 29 to 37
ep_idx[i] = 350
buffer_index = 294 , len(sampled_ep) = 198, trace = from 1 to 9
ep_idx[i] = 310
buffer_index = 254 , len(sampled_ep) = 164, trace = from 1 to 9
ep_idx[i] = 325
buffer_index = 269 , len(sampled_ep) = 82, trace = from 48 to 56
ep_idx[i] = 267
buffer_index = 211 , len(samp

buffer_index = 275 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 210
buffer_index = 153 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 330
buffer_index = 273 , len(sampled_ep) = 175, trace = from 101 to 109
ep_idx[i] = 136
buffer_index = 79 , len(sampled_ep) = 108, trace = from 12 to 20
ep_idx[i] = 71
buffer_index = 14 , len(sampled_ep) = 299, trace = from 58 to 66
ep_idx[i] = 267
buffer_index = 210 , len(sampled_ep) = 83, trace = from 33 to 41
ep_idx[i] = 337
buffer_index = 280 , len(sampled_ep) = 82, trace = from 56 to 64
ep_idx[i] = 334
buffer_index = 277 , len(sampled_ep) = 113, trace = from 90 to 98
ep_idx[i] = 334
buffer_index = 277 , len(sampled_ep) = 113, trace = from 62 to 70
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9948760000000423
epsilon is = 0.9948745000000423
epsilon is = 0.9948730000000423
epsilon is = 0.9948715000000423
epsilon is = 0.9948700000000423
Target Set Success
ep_idx = [268, 118, 254, 71, 93, 110, 337, 330, 291, 112, 334, 159, 135

buffer_index = 16 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 110
buffer_index = 53 , len(sampled_ep) = 137, trace = from 104 to 112
ep_idx[i] = 97
buffer_index = 40 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 110
buffer_index = 53 , len(sampled_ep) = 137, trace = from 115 to 123
ep_idx[i] = 158
buffer_index = 101 , len(sampled_ep) = 83, trace = from 49 to 57
ep_idx[i] = 89
buffer_index = 32 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 70
buffer_index = 13 , len(sampled_ep) = 198, trace = from 59 to 67
ep_idx[i] = 86
buffer_index = 29 , len(sampled_ep) = 77, trace = from 67 to 75
ep_idx[i] = 211
buffer_index = 154 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 127
buffer_index = 70 , len(sampled_ep) = 50, trace = from 10 to 18
ep_idx[i] = 60
buffer_index = 3 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 247
buffer_index = 190 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 155
buffer_index = 98 , len(sampled_ep) = 

ep_idx[i] = 63
buffer_index = 6 , len(sampled_ep) = 47, trace = from 2 to 10
ep_idx[i] = 171
buffer_index = 114 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 251
buffer_index = 194 , len(sampled_ep) = 133, trace = from 52 to 60
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9948385000000426
epsilon is = 0.9948370000000426
epsilon is = 0.9948355000000426
epsilon is = 0.9948340000000426
epsilon is = 0.9948325000000426
Target Set Success
ep_idx = [269, 308, 115, 111, 234, 92, 66, 332, 156, 99, 84, 279, 171, 249, 334, 171, 75, 96, 80, 232, 239, 73, 110, 123, 64, 305, 135, 118, 135, 254, 278, 338]
exp_idx = [7, 16, 129, 68, 161, 214, 35, 24, 25, 78, 69, 17, 52, 10, 112, 12, 36, 78, 16, 59, 14, 15, 108, 138, 54, 46, 181, 58, 53, 34, 80, 51]
idx_offset = 57, self.episode_index = 356, len(self.buffer) = 300
ep_idx[i] = 269
buffer_index = 212 , len(sampled_ep) = 45, trace = from 0 to 8
ep_idx[i] = 308
buffer_index = 251 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 115
b

buffer_index = 292 , len(sampled_ep) = 228, trace = from 28 to 36
ep_idx[i] = 75
buffer_index = 18 , len(sampled_ep) = 75, trace = from 21 to 29
ep_idx[i] = 262
buffer_index = 205 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 185
buffer_index = 128 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 296
buffer_index = 239 , len(sampled_ep) = 108, trace = from 48 to 56
ep_idx[i] = 126
buffer_index = 69 , len(sampled_ep) = 46, trace = from 16 to 24
ep_idx[i] = 222
buffer_index = 165 , len(sampled_ep) = 224, trace = from 198 to 206
ep_idx[i] = 220
buffer_index = 163 , len(sampled_ep) = 118, trace = from 22 to 30
ep_idx[i] = 81
buffer_index = 24 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 228
buffer_index = 171 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 110
buffer_index = 53 , len(sampled_ep) = 137, trace = from 22 to 30
ep_idx[i] = 215
buffer_index = 158 , len(sampled_ep) = 117, trace = from 48 to 56
ep_idx[i] = 152
buffer_index = 95 , len(sam

ep_idx = [220, 125, 92, 326, 75, 103, 349, 314, 329, 334, 182, 132, 287, 152, 167, 330, 87, 106, 78, 275, 280, 85, 281, 316, 227, 236, 330, 212, 86, 197, 92, 340]
exp_idx = [21, 14, 79, 44, 43, 18, 52, 60, 31, 104, 80, 69, 105, 16, 7, 41, 43, 75, 18, 9, 76, 38, 93, 14, 75, 57, 155, 48, 63, 23, 52, 41]
idx_offset = 57, self.episode_index = 356, len(self.buffer) = 300
ep_idx[i] = 220
buffer_index = 163 , len(sampled_ep) = 118, trace = from 14 to 22
ep_idx[i] = 125
buffer_index = 68 , len(sampled_ep) = 75, trace = from 7 to 15
ep_idx[i] = 92
buffer_index = 35 , len(sampled_ep) = 281, trace = from 72 to 80
ep_idx[i] = 326
buffer_index = 269 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 75
buffer_index = 18 , len(sampled_ep) = 75, trace = from 36 to 44
ep_idx[i] = 103
buffer_index = 46 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 349
buffer_index = 292 , len(sampled_ep) = 228, trace = from 45 to 53
ep_idx[i] = 314
buffer_index = 257 , len(sampled_ep) = 81, trace = f

buffer_index = 219 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 247
buffer_index = 190 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 97
buffer_index = 40 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 227
buffer_index = 170 , len(sampled_ep) = 112, trace = from 67 to 75
ep_idx[i] = 237
buffer_index = 180 , len(sampled_ep) = 116, trace = from 10 to 18
ep_idx[i] = 201
buffer_index = 144 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 330
buffer_index = 273 , len(sampled_ep) = 175, trace = from 121 to 129
ep_idx[i] = 165
buffer_index = 108 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 178
buffer_index = 121 , len(sampled_ep) = 80, trace = from 65 to 73
ep_idx[i] = 331
buffer_index = 274 , len(sampled_ep) = 84, trace = from 14 to 22
ep_idx[i] = 334
buffer_index = 277 , len(sampled_ep) = 113, trace = from 87 to 95
ep_idx[i] = 63
buffer_index = 6 , len(sampled_ep) = 47, trace = from 1 to 9
ep_idx[i] = 317
buffer_index = 260 , len(sampled_

ep_idx[i] = 107
buffer_index = 49 , len(sampled_ep) = 109, trace = from 11 to 19
ep_idx[i] = 267
buffer_index = 209 , len(sampled_ep) = 83, trace = from 23 to 31
ep_idx[i] = 247
buffer_index = 189 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 290
buffer_index = 232 , len(sampled_ep) = 152, trace = from 69 to 77
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 119
buffer_index = 61 , len(sampled_ep) = 183, trace = from 43 to 51
ep_idx[i] = 98
buffer_index = 40 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 344
buffer_index = 286 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 134
buffer_index = 76 , len(sampled_ep) = 208, trace = from 4 to 12
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 170
buffer_index = 112 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 135
buffer_index = 77 , len(sampled_ep) = 211, trace = from 140 to 148
ep_idx[i] = 330
buffer_

buffer_index = 191 , len(sampled_ep) = 75, trace = from 23 to 31
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 311
buffer_index = 253 , len(sampled_ep) = 84, trace = from 13 to 21
ep_idx[i] = 203
buffer_index = 145 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 99
buffer_index = 41 , len(sampled_ep) = 83, trace = from 22 to 30
ep_idx[i] = 238
buffer_index = 180 , len(sampled_ep) = 79, trace = from 35 to 43
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 91 to 99
ep_idx[i] = 211
buffer_index = 153 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 276
buffer_index = 218 , len(sampled_ep) = 81, trace = from 34 to 42
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9947410000000434
epsilon is = 0.9947395000000434
epsilon is = 0.9947380000000434
epsilon is = 0.9947365000000434
epsilon is = 0.9947350000000434
Target Set Success
ep_idx = [334, 61, 313, 79, 71, 315, 196, 119, 71, 290, 347, 306, 298, 99

buffer_index = 7 , len(sampled_ep) = 45, trace = from 20 to 28
ep_idx[i] = 209
buffer_index = 151 , len(sampled_ep) = 71, trace = from 2 to 10
ep_idx[i] = 313
buffer_index = 255 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 38 to 46
ep_idx[i] = 292
buffer_index = 234 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 234
buffer_index = 176 , len(sampled_ep) = 167, trace = from 2 to 10
ep_idx[i] = 200
buffer_index = 142 , len(sampled_ep) = 82, trace = from 8 to 16
ep_idx[i] = 268
buffer_index = 210 , len(sampled_ep) = 120, trace = from 40 to 48
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 103 to 111
ep_idx[i] = 281
buffer_index = 223 , len(sampled_ep) = 178, trace = from 157 to 165
ep_idx[i] = 353
buffer_index = 295 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 71
buffer_index = 13 , len(sampled_ep) = 299, trace = from 76 to 84
ep_idx[i] = 341
buffer_index = 283 , len(sa

buffer_index = 237 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 123
buffer_index = 65 , len(sampled_ep) = 198, trace = from 9 to 17
ep_idx[i] = 134
buffer_index = 76 , len(sampled_ep) = 208, trace = from 133 to 141
ep_idx[i] = 178
buffer_index = 120 , len(sampled_ep) = 80, trace = from 16 to 24
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9947035000000437
epsilon is = 0.9947020000000437
epsilon is = 0.9947005000000437
epsilon is = 0.9946990000000437
epsilon is = 0.9946975000000438
Target Set Success
ep_idx = [280, 211, 311, 113, 349, 106, 285, 310, 219, 77, 254, 170, 336, 334, 305, 92, 330, 294, 348, 170, 270, 190, 291, 178, 114, 254, 188, 211, 251, 232, 141, 293]
exp_idx = [53, 32, 35, 9, 171, 95, 75, 10, 49, 16, 52, 11, 18, 96, 14, 212, 78, 76, 60, 17, 70, 15, 70, 21, 31, 110, 56, 29, 93, 21, 62, 49]
idx_offset = 58, self.episode_index = 357, len(self.buffer) = 300
ep_idx[i] = 280
buffer_index = 222 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 211
buffer_

buffer_index = 170 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 92
buffer_index = 34 , len(sampled_ep) = 281, trace = from 238 to 246
ep_idx[i] = 66
buffer_index = 8 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 106
buffer_index = 48 , len(sampled_ep) = 103, trace = from 68 to 76
ep_idx[i] = 335
buffer_index = 277 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 245
buffer_index = 187 , len(sampled_ep) = 75, trace = from 58 to 66
ep_idx[i] = 200
buffer_index = 142 , len(sampled_ep) = 82, trace = from 40 to 48
ep_idx[i] = 152
buffer_index = 94 , len(sampled_ep) = 228, trace = from 5 to 13
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 33 to 41
ep_idx[i] = 169
buffer_index = 111 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 220
buffer_index = 162 , len(sampled_ep) = 118, trace = from 46 to 54
ep_idx[i] = 279
buffer_index = 221 , len(sampled_ep) = 48, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 276 , len(sample

epsilon is = 0.994661500000044
epsilon is = 0.9946600000000441
Target Set Success
ep_idx = [111, 85, 127, 334, 334, 230, 194, 154, 227, 220, 180, 334, 71, 58, 199, 112, 340, 115, 228, 106, 334, 67, 237, 273, 71, 318, 147, 334, 116, 203, 319, 184]
exp_idx = [51, 16, 23, 90, 65, 12, 43, 106, 29, 51, 94, 95, 291, 13, 32, 48, 37, 151, 49, 75, 35, 17, 114, 52, 130, 73, 52, 102, 122, 63, 31, 26]
idx_offset = 58, self.episode_index = 357, len(self.buffer) = 300
ep_idx[i] = 111
buffer_index = 53 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 85
buffer_index = 27 , len(sampled_ep) = 108, trace = from 9 to 17
ep_idx[i] = 127
buffer_index = 69 , len(sampled_ep) = 50, trace = from 16 to 24
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 83 to 91
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 58 to 66
ep_idx[i] = 230
buffer_index = 172 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 194
buffer_index = 136 , len(sampled_ep) = 81,

buffer_index = 211 , len(sampled_ep) = 45, trace = from 6 to 14
ep_idx[i] = 198
buffer_index = 140 , len(sampled_ep) = 42, trace = from 5 to 13
ep_idx[i] = 145
buffer_index = 87 , len(sampled_ep) = 117, trace = from 43 to 51
ep_idx[i] = 308
buffer_index = 250 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 152
buffer_index = 94 , len(sampled_ep) = 228, trace = from 69 to 77
ep_idx[i] = 298
buffer_index = 240 , len(sampled_ep) = 115, trace = from 38 to 46
ep_idx[i] = 334
buffer_index = 276 , len(sampled_ep) = 113, trace = from 80 to 88
ep_idx[i] = 217
buffer_index = 159 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 176
buffer_index = 118 , len(sampled_ep) = 80, trace = from 60 to 68
ep_idx[i] = 229
buffer_index = 171 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 223
buffer_index = 165 , len(sampled_ep) = 112, trace = from 29 to 37
ep_idx[i] = 305
buffer_index = 247 , len(sampled_ep) = 71, trace = from 19 to 27
ep_idx[i] = 219
buffer_index = 161 , len(sam

exp_idx = [147, 27, 57, 33, 57, 38, 32, 111, 18, 76, 45, 78, 169, 16, 17, 57, 47, 11, 83, 132, 119, 129, 47, 13, 29, 81, 100, 34, 97, 102, 13, 91]
idx_offset = 59, self.episode_index = 358, len(self.buffer) = 300
ep_idx[i] = 123
buffer_index = 64 , len(sampled_ep) = 198, trace = from 140 to 148
ep_idx[i] = 162
buffer_index = 103 , len(sampled_ep) = 112, trace = from 20 to 28
ep_idx[i] = 245
buffer_index = 186 , len(sampled_ep) = 75, trace = from 50 to 58
ep_idx[i] = 105
buffer_index = 46 , len(sampled_ep) = 74, trace = from 26 to 34
ep_idx[i] = 184
buffer_index = 125 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 254
buffer_index = 195 , len(sampled_ep) = 116, trace = from 31 to 39
ep_idx[i] = 349
buffer_index = 290 , len(sampled_ep) = 228, trace = from 25 to 33
ep_idx[i] = 334
buffer_index = 275 , len(sampled_ep) = 113, trace = from 104 to 112
ep_idx[i] = 221
buffer_index = 162 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 329
buffer_index = 270 , len(sampled_ep

ep_idx[i] = 264
buffer_index = 205 , len(sampled_ep) = 109, trace = from 21 to 29
ep_idx[i] = 330
buffer_index = 271 , len(sampled_ep) = 175, trace = from 92 to 100
ep_idx[i] = 331
buffer_index = 272 , len(sampled_ep) = 84, trace = from 52 to 60
ep_idx[i] = 152
buffer_index = 93 , len(sampled_ep) = 228, trace = from 122 to 130
ep_idx[i] = 74
buffer_index = 15 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 331
buffer_index = 272 , len(sampled_ep) = 84, trace = from 41 to 49
ep_idx[i] = 349
buffer_index = 290 , len(sampled_ep) = 228, trace = from 98 to 106
ep_idx[i] = 234
buffer_index = 175 , len(sampled_ep) = 167, trace = from 27 to 35
ep_idx[i] = 122
buffer_index = 63 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 166
buffer_index = 107 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 94
buffer_index = 35 , len(sampled_ep) = 45, trace = from 24 to 32
ep_idx[i] = 180
buffer_index = 121 , len(sampled_ep) = 141, trace = from 41 to 49
sampledTraces.shape = (3

buffer_index = 5 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 77
buffer_index = 18 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 163
buffer_index = 104 , len(sampled_ep) = 134, trace = from 126 to 134
ep_idx[i] = 222
buffer_index = 163 , len(sampled_ep) = 224, trace = from 41 to 49
ep_idx[i] = 340
buffer_index = 281 , len(sampled_ep) = 142, trace = from 112 to 120
ep_idx[i] = 326
buffer_index = 267 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 166
buffer_index = 107 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 82
buffer_index = 23 , len(sampled_ep) = 73, trace = from 5 to 13
ep_idx[i] = 126
buffer_index = 67 , len(sampled_ep) = 46, trace = from 6 to 14
ep_idx[i] = 119
buffer_index = 60 , len(sampled_ep) = 183, trace = from 125 to 133
ep_idx[i] = 323
buffer_index = 264 , len(sampled_ep) = 43, trace = from 25 to 33
ep_idx[i] = 106
buffer_index = 47 , len(sampled_ep) = 103, trace = from 90 to 98
ep_idx[i] = 319
buffer_index = 260 , len(sam

buffer_index = 270 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 234
buffer_index = 175 , len(sampled_ep) = 167, trace = from 23 to 31
ep_idx[i] = 140
buffer_index = 81 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 289
buffer_index = 230 , len(sampled_ep) = 83, trace = from 35 to 43
ep_idx[i] = 169
buffer_index = 110 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 172
buffer_index = 113 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 342
buffer_index = 283 , len(sampled_ep) = 81, trace = from 6 to 14
sampledTraces.shape = (32, 8, 6)
length of poped element = 81 , cntr = 74 , diff = 7
epsilon is = 0.9945685000000448
epsilon is = 0.9945670000000448
epsilon is = 0.9945655000000448
epsilon is = 0.9945640000000449
epsilon is = 0.9945625000000449
Target Set Success
ep_idx = [115, 87, 274, 270, 268, 230, 125, 76, 167, 185, 349, 330, 275, 75, 303, 71, 146, 274, 175, 241, 249, 197, 281, 93, 223, 60, 355, 329, 327, 233, 190, 174]
exp_idx = [13, 50, 64, 

buffer_index = 55 , len(sampled_ep) = 299, trace = from 177 to 185
ep_idx[i] = 115
buffer_index = 55 , len(sampled_ep) = 299, trace = from 75 to 83
ep_idx[i] = 285
buffer_index = 225 , len(sampled_ep) = 166, trace = from 126 to 134
ep_idx[i] = 110
buffer_index = 50 , len(sampled_ep) = 137, trace = from 114 to 122
ep_idx[i] = 98
buffer_index = 38 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 247
buffer_index = 187 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 264
buffer_index = 204 , len(sampled_ep) = 109, trace = from 89 to 97
ep_idx[i] = 74
buffer_index = 14 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 200
buffer_index = 140 , len(sampled_ep) = 82, trace = from 59 to 67
ep_idx[i] = 310
buffer_index = 250 , len(sampled_ep) = 164, trace = from 41 to 49
ep_idx[i] = 149
buffer_index = 89 , len(sampled_ep) = 82, trace = from 12 to 20
ep_idx[i] = 123
buffer_index = 63 , len(sampled_ep) = 198, trace = from 128 to 136
ep_idx[i] = 70
buffer_index = 10 , len(s

buffer_index = 194 , len(sampled_ep) = 116, trace = from 104 to 112
ep_idx[i] = 330
buffer_index = 270 , len(sampled_ep) = 175, trace = from 124 to 132
ep_idx[i] = 201
buffer_index = 141 , len(sampled_ep) = 81, trace = from 4 to 12
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9945310000000451
epsilon is = 0.9945295000000451
epsilon is = 0.9945280000000452
epsilon is = 0.9945265000000452
epsilon is = 0.9945250000000452
Target Set Success
ep_idx = [161, 71, 350, 285, 359, 149, 198, 119, 191, 90, 114, 286, 334, 306, 267, 189, 130, 141, 349, 167, 334, 121, 154, 100, 242, 63, 75, 334, 142, 84, 147, 215]
exp_idx = [57, 64, 30, 93, 33, 18, 10, 95, 85, 50, 65, 44, 81, 86, 51, 55, 33, 36, 206, 78, 59, 18, 85, 84, 14, 23, 31, 83, 27, 48, 25, 100]
idx_offset = 60, self.episode_index = 359, len(self.buffer) = 300
ep_idx[i] = 161
buffer_index = 101 , len(sampled_ep) = 114, trace = from 50 to 58
ep_idx[i] = 71
buffer_index = 11 , len(sampled_ep) = 299, trace = from 57 to 65
ep_idx[i] = 350
buffer

buffer_index = 96 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 129
buffer_index = 69 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 248
buffer_index = 188 , len(sampled_ep) = 77, trace = from 18 to 26
ep_idx[i] = 180
buffer_index = 120 , len(sampled_ep) = 141, trace = from 38 to 46
ep_idx[i] = 356
buffer_index = 296 , len(sampled_ep) = 100, trace = from 61 to 69
ep_idx[i] = 83
buffer_index = 23 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 284
buffer_index = 224 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 326
buffer_index = 266 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 281
buffer_index = 221 , len(sampled_ep) = 178, trace = from 17 to 25
ep_idx[i] = 272
buffer_index = 212 , len(sampled_ep) = 70, trace = from 43 to 51
ep_idx[i] = 289
buffer_index = 229 , len(sampled_ep) = 83, trace = from 21 to 29
ep_idx[i] = 337
buffer_index = 277 , len(sampled_ep) = 82, trace = from 11 to 19
ep_idx[i] = 113
buffer_index = 53 , len(sampled

ep_idx = [315, 350, 269, 87, 234, 128, 343, 115, 119, 330, 306, 326, 296, 328, 288, 130, 70, 304, 265, 92, 334, 66, 245, 205, 259, 201, 82, 123, 60, 113, 85, 281]
exp_idx = [66, 102, 12, 70, 94, 73, 38, 252, 52, 160, 114, 43, 16, 46, 20, 69, 183, 13, 46, 223, 71, 35, 7, 14, 17, 27, 10, 49, 18, 34, 96, 141]
idx_offset = 60, self.episode_index = 359, len(self.buffer) = 300
ep_idx[i] = 315
buffer_index = 255 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 350
buffer_index = 290 , len(sampled_ep) = 198, trace = from 95 to 103
ep_idx[i] = 269
buffer_index = 209 , len(sampled_ep) = 45, trace = from 5 to 13
ep_idx[i] = 87
buffer_index = 27 , len(sampled_ep) = 76, trace = from 63 to 71
ep_idx[i] = 234
buffer_index = 174 , len(sampled_ep) = 167, trace = from 87 to 95
ep_idx[i] = 128
buffer_index = 68 , len(sampled_ep) = 74, trace = from 66 to 74
ep_idx[i] = 343
buffer_index = 283 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 115
buffer_index = 55 , len(sampled_ep) = 299, t

buffer_index = 293 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 133
buffer_index = 73 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 97
buffer_index = 37 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 82
buffer_index = 22 , len(sampled_ep) = 73, trace = from 5 to 13
ep_idx[i] = 133
buffer_index = 73 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 189
buffer_index = 129 , len(sampled_ep) = 84, trace = from 31 to 39
ep_idx[i] = 283
buffer_index = 223 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 100
buffer_index = 40 , len(sampled_ep) = 102, trace = from 83 to 91
ep_idx[i] = 314
buffer_index = 254 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 60
buffer_index = 0 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 262
buffer_index = 202 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 284
buffer_index = 224 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 334
buffer_index = 274 , len(sampled_ep) = 

buffer_index = 135 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 350
buffer_index = 290 , len(sampled_ep) = 198, trace = from 32 to 40
ep_idx[i] = 320
buffer_index = 260 , len(sampled_ep) = 71, trace = from 46 to 54
ep_idx[i] = 292
buffer_index = 232 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 339
buffer_index = 279 , len(sampled_ep) = 78, trace = from 43 to 51
ep_idx[i] = 250
buffer_index = 190 , len(sampled_ep) = 49, trace = from 4 to 12
ep_idx[i] = 302
buffer_index = 242 , len(sampled_ep) = 79, trace = from 71 to 79
ep_idx[i] = 300
buffer_index = 240 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 350
buffer_index = 290 , len(sampled_ep) = 198, trace = from 153 to 161
ep_idx[i] = 251
buffer_index = 191 , len(sampled_ep) = 133, trace = from 46 to 54
ep_idx[i] = 125
buffer_index = 65 , len(sampled_ep) = 75, trace = from 8 to 16
ep_idx[i] = 281
buffer_index = 221 , len(sampled_ep) = 178, trace = from 54 to 62
ep_idx[i] = 62
buffer_index = 2 , len(sam

buffer_index = 227 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 344
buffer_index = 283 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 164
buffer_index = 103 , len(sampled_ep) = 118, trace = from 1 to 9
ep_idx[i] = 332
buffer_index = 271 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 123
buffer_index = 62 , len(sampled_ep) = 198, trace = from 126 to 134
ep_idx[i] = 135
buffer_index = 74 , len(sampled_ep) = 211, trace = from 59 to 67
ep_idx[i] = 135
buffer_index = 74 , len(sampled_ep) = 211, trace = from 44 to 52
ep_idx[i] = 328
buffer_index = 267 , len(sampled_ep) = 75, trace = from 67 to 75
ep_idx[i] = 223
buffer_index = 162 , len(sampled_ep) = 112, trace = from 66 to 74
ep_idx[i] = 175
buffer_index = 114 , len(sampled_ep) = 102, trace = from 76 to 84
ep_idx[i] = 92
buffer_index = 31 , len(sampled_ep) = 281, trace = from 199 to 207
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9944335000000459
epsilon is = 0.994432000000046
epsilon is = 0.99443050000

buffer_index = 26 , len(sampled_ep) = 76, trace = from 33 to 41
ep_idx[i] = 207
buffer_index = 146 , len(sampled_ep) = 75, trace = from 57 to 65
ep_idx[i] = 201
buffer_index = 140 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 335
buffer_index = 274 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 227
buffer_index = 166 , len(sampled_ep) = 112, trace = from 10 to 18
ep_idx[i] = 120
buffer_index = 59 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 334
buffer_index = 273 , len(sampled_ep) = 113, trace = from 72 to 80
ep_idx[i] = 156
buffer_index = 95 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 157
buffer_index = 96 , len(sampled_ep) = 103, trace = from 78 to 86
ep_idx[i] = 334
buffer_index = 273 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 138
buffer_index = 77 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 239
buffer_index = 178 , len(sampled_ep) = 101, trace = from 43 to 51
ep_idx[i] = 76
buffer_index = 15 , len(samp

buffer_index = 272 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 75
buffer_index = 14 , len(sampled_ep) = 75, trace = from 40 to 48
ep_idx[i] = 330
buffer_index = 269 , len(sampled_ep) = 175, trace = from 92 to 100
ep_idx[i] = 194
buffer_index = 133 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 334
buffer_index = 273 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 110
buffer_index = 49 , len(sampled_ep) = 137, trace = from 7 to 15
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9943960000000462
epsilon is = 0.9943945000000463
epsilon is = 0.9943930000000463
epsilon is = 0.9943915000000463
epsilon is = 0.9943900000000463
Target Set Success
ep_idx = [180, 349, 334, 242, 103, 264, 334, 355, 146, 275, 248, 105, 252, 81, 236, 187, 227, 264, 330, 358, 220, 80, 330, 207, 148, 185, 317, 255, 331, 154, 187, 334]
exp_idx = [19, 197, 100, 16, 45, 53, 110, 49, 17, 70, 60, 12, 11, 16, 52, 74, 15, 47, 51, 71, 20, 45, 75, 28, 51, 35, 56, 39, 12, 88, 29, 62]
idx_of

buffer_index = 285 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 328
buffer_index = 266 , len(sampled_ep) = 75, trace = from 34 to 42
ep_idx[i] = 329
buffer_index = 267 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 182
buffer_index = 120 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 70
buffer_index = 8 , len(sampled_ep) = 198, trace = from 113 to 121
ep_idx[i] = 123
buffer_index = 61 , len(sampled_ep) = 198, trace = from 2 to 10
ep_idx[i] = 127
buffer_index = 65 , len(sampled_ep) = 50, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 272 , len(sampled_ep) = 113, trace = from 69 to 77
ep_idx[i] = 295
buffer_index = 233 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 251
buffer_index = 189 , len(sampled_ep) = 133, trace = from 81 to 89
ep_idx[i] = 274
buffer_index = 212 , len(sampled_ep) = 114, trace = from 40 to 48
ep_idx[i] = 257
buffer_index = 195 , len(sampled_ep) = 82, trace = from 13 to 21
ep_idx[i] = 314
buffer_index = 252 , len(sam

buffer_index = 77 , len(sampled_ep) = 133, trace = from 1 to 9
ep_idx[i] = 94
buffer_index = 32 , len(sampled_ep) = 45, trace = from 32 to 40
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9943585000000466
epsilon is = 0.9943570000000466
epsilon is = 0.9943555000000466
epsilon is = 0.9943540000000466
epsilon is = 0.9943525000000466
Target Set Success
ep_idx = [334, 295, 125, 167, 133, 138, 239, 195, 264, 166, 227, 110, 334, 344, 157, 250, 206, 336, 237, 327, 88, 342, 266, 286, 157, 168, 350, 330, 68, 66, 255, 170]
exp_idx = [112, 51, 15, 17, 9, 16, 61, 27, 84, 24, 25, 88, 104, 13, 17, 13, 26, 47, 107, 9, 39, 51, 73, 83, 45, 74, 140, 87, 17, 36, 44, 21]
idx_offset = 62, self.episode_index = 361, len(self.buffer) = 300
ep_idx[i] = 334
buffer_index = 272 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 295
buffer_index = 233 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 125
buffer_index = 63 , len(sampled_ep) = 75, trace = from 8 to 16
ep_idx[i] = 167
buffer_index

buffer_index = 99 , len(sampled_ep) = 114, trace = from 93 to 101
ep_idx[i] = 245
buffer_index = 183 , len(sampled_ep) = 75, trace = from 42 to 50
ep_idx[i] = 87
buffer_index = 25 , len(sampled_ep) = 76, trace = from 61 to 69
ep_idx[i] = 313
buffer_index = 251 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 239
buffer_index = 177 , len(sampled_ep) = 101, trace = from 50 to 58
ep_idx[i] = 193
buffer_index = 131 , len(sampled_ep) = 108, trace = from 34 to 42
ep_idx[i] = 116
buffer_index = 54 , len(sampled_ep) = 162, trace = from 118 to 126
ep_idx[i] = 239
buffer_index = 177 , len(sampled_ep) = 101, trace = from 36 to 44
ep_idx[i] = 112
buffer_index = 50 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 135
buffer_index = 73 , len(sampled_ep) = 211, trace = from 86 to 94
ep_idx[i] = 123
buffer_index = 61 , len(sampled_ep) = 198, trace = from 124 to 132
ep_idx[i] = 128
buffer_index = 66 , len(sampled_ep) = 74, trace = from 7 to 15
ep_idx[i] = 225
buffer_index = 163 , len(

ep_idx = [301, 295, 93, 173, 219, 313, 115, 295, 330, 72, 129, 197, 178, 288, 76, 65, 131, 291, 327, 141, 145, 116, 175, 188, 169, 145, 306, 310, 144, 214, 166, 332]
exp_idx = [25, 7, 15, 45, 69, 16, 175, 15, 172, 32, 44, 16, 46, 53, 135, 39, 40, 76, 68, 8, 71, 84, 92, 45, 17, 96, 25, 81, 66, 63, 74, 15]
idx_offset = 62, self.episode_index = 361, len(self.buffer) = 300
ep_idx[i] = 301
buffer_index = 239 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 295
buffer_index = 233 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 93
buffer_index = 31 , len(sampled_ep) = 77, trace = from 8 to 16
ep_idx[i] = 173
buffer_index = 111 , len(sampled_ep) = 71, trace = from 38 to 46
ep_idx[i] = 219
buffer_index = 157 , len(sampled_ep) = 110, trace = from 62 to 70
ep_idx[i] = 313
buffer_index = 251 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 115
buffer_index = 53 , len(sampled_ep) = 299, trace = from 168 to 176
ep_idx[i] = 295
buffer_index = 233 , len(sampled_ep) = 81, trace

buffer_index = 266 , len(sampled_ep) = 75, trace = from 49 to 57
ep_idx[i] = 119
buffer_index = 57 , len(sampled_ep) = 183, trace = from 111 to 119
ep_idx[i] = 110
buffer_index = 48 , len(sampled_ep) = 137, trace = from 114 to 122
ep_idx[i] = 212
buffer_index = 150 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 300
buffer_index = 238 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 303
buffer_index = 241 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 116
buffer_index = 54 , len(sampled_ep) = 162, trace = from 114 to 122
ep_idx[i] = 302
buffer_index = 240 , len(sampled_ep) = 79, trace = from 59 to 67
ep_idx[i] = 328
buffer_index = 266 , len(sampled_ep) = 75, trace = from 52 to 60
ep_idx[i] = 278
buffer_index = 216 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 356
buffer_index = 294 , len(sampled_ep) = 100, trace = from 37 to 45
ep_idx[i] = 174
buffer_index = 112 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 71
buffer_index = 9 , len(

buffer_index = 32 , len(sampled_ep) = 45, trace = from 6 to 14
ep_idx[i] = 234
buffer_index = 172 , len(sampled_ep) = 167, trace = from 74 to 82
ep_idx[i] = 130
buffer_index = 68 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 213
buffer_index = 151 , len(sampled_ep) = 148, trace = from 15 to 23
ep_idx[i] = 189
buffer_index = 127 , len(sampled_ep) = 84, trace = from 42 to 50
ep_idx[i] = 179
buffer_index = 117 , len(sampled_ep) = 77, trace = from 25 to 33
ep_idx[i] = 71
buffer_index = 9 , len(sampled_ep) = 299, trace = from 64 to 72
ep_idx[i] = 79
buffer_index = 17 , len(sampled_ep) = 75, trace = from 46 to 54
ep_idx[i] = 145
buffer_index = 83 , len(sampled_ep) = 117, trace = from 13 to 21
ep_idx[i] = 234
buffer_index = 172 , len(sampled_ep) = 167, trace = from 156 to 164
ep_idx[i] = 76
buffer_index = 14 , len(sampled_ep) = 198, trace = from 171 to 179
ep_idx[i] = 132
buffer_index = 70 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 301
buffer_index = 239 , len(sampl

ep_idx[i] = 145
buffer_index = 83 , len(sampled_ep) = 117, trace = from 81 to 89
ep_idx[i] = 224
buffer_index = 162 , len(sampled_ep) = 46, trace = from 2 to 10
ep_idx[i] = 135
buffer_index = 73 , len(sampled_ep) = 211, trace = from 59 to 67
ep_idx[i] = 140
buffer_index = 78 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 351
buffer_index = 289 , len(sampled_ep) = 143, trace = from 117 to 125
ep_idx[i] = 68
buffer_index = 6 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 160
buffer_index = 98 , len(sampled_ep) = 117, trace = from 36 to 44
ep_idx[i] = 237
buffer_index = 175 , len(sampled_ep) = 116, trace = from 89 to 97
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9942610000000474
epsilon is = 0.9942595000000474
epsilon is = 0.9942580000000474
epsilon is = 0.9942565000000474
epsilon is = 0.9942550000000474
Target Set Success
ep_idx = [217, 348, 152, 217, 330, 310, 266, 128, 135, 295, 195, 212, 285, 138, 350, 264, 336, 152, 330, 227, 119, 205, 325, 332, 298, 268, 9

ep_idx[i] = 222
buffer_index = 160 , len(sampled_ep) = 224, trace = from 13 to 21
ep_idx[i] = 197
buffer_index = 135 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 222
buffer_index = 160 , len(sampled_ep) = 224, trace = from 177 to 185
ep_idx[i] = 115
buffer_index = 53 , len(sampled_ep) = 299, trace = from 89 to 97
ep_idx[i] = 149
buffer_index = 87 , len(sampled_ep) = 82, trace = from 43 to 51
ep_idx[i] = 218
buffer_index = 156 , len(sampled_ep) = 80, trace = from 6 to 14
ep_idx[i] = 168
buffer_index = 106 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 297
buffer_index = 235 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 220
buffer_index = 158 , len(sampled_ep) = 118, trace = from 24 to 32
ep_idx[i] = 334
buffer_index = 272 , len(sampled_ep) = 113, trace = from 6 to 14
ep_idx[i] = 226
buffer_index = 164 , len(sampled_ep) = 120, trace = from 79 to 87
ep_idx[i] = 330
buffer_index = 268 , len(sampled_ep) = 175, trace = from 167 to 175
ep_idx[i] = 347
buffe

buffer_index = 234 , len(sampled_ep) = 108, trace = from 27 to 35
ep_idx[i] = 322
buffer_index = 260 , len(sampled_ep) = 111, trace = from 2 to 10
ep_idx[i] = 191
buffer_index = 129 , len(sampled_ep) = 131, trace = from 108 to 116
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9942235000000477
epsilon is = 0.9942220000000477
length of poped element = 77 , cntr = 70 , diff = 7
epsilon is = 0.9942205000000477
epsilon is = 0.9942190000000477
epsilon is = 0.9942175000000477
Target Set Success
ep_idx = [304, 292, 194, 172, 66, 96, 251, 356, 227, 253, 310, 330, 243, 145, 349, 356, 259, 272, 163, 214, 107, 136, 298, 336, 136, 237, 176, 265, 191, 115, 214, 334]
exp_idx = [64, 39, 16, 16, 16, 31, 89, 8, 18, 59, 158, 106, 10, 104, 218, 11, 18, 19, 44, 61, 22, 30, 17, 50, 9, 22, 56, 27, 64, 139, 50, 101]
idx_offset = 63, self.episode_index = 362, len(self.buffer) = 300
ep_idx[i] = 304
buffer_index = 241 , len(sampled_ep) = 72, trace = from 57 to 65
ep_idx[i] = 292
buffer_index = 229 , len(sample

buffer_index = 52 , len(sampled_ep) = 299, trace = from 209 to 217
ep_idx[i] = 242
buffer_index = 179 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 204
buffer_index = 141 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 322
buffer_index = 259 , len(sampled_ep) = 111, trace = from 23 to 31
ep_idx[i] = 240
buffer_index = 177 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 180
buffer_index = 117 , len(sampled_ep) = 141, trace = from 15 to 23
ep_idx[i] = 159
buffer_index = 96 , len(sampled_ep) = 79, trace = from 70 to 78
ep_idx[i] = 330
buffer_index = 267 , len(sampled_ep) = 175, trace = from 155 to 163
ep_idx[i] = 349
buffer_index = 286 , len(sampled_ep) = 228, trace = from 17 to 25
ep_idx[i] = 286
buffer_index = 223 , len(sampled_ep) = 147, trace = from 112 to 120
ep_idx[i] = 148
buffer_index = 85 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 296
buffer_index = 233 , len(sampled_ep) = 108, trace = from 70 to 78
ep_idx[i] = 222
buffer_index = 159 

epsilon is = 0.994181500000048
epsilon is = 0.994180000000048
Target Set Success
ep_idx = [327, 141, 70, 334, 79, 196, 176, 332, 150, 156, 336, 322, 172, 247, 199, 334, 227, 137, 79, 330, 320, 154, 267, 210, 135, 317, 125, 334, 342, 223, 254, 173]
exp_idx = [13, 41, 20, 102, 54, 84, 40, 31, 48, 31, 10, 65, 74, 57, 86, 99, 49, 37, 24, 67, 59, 19, 13, 8, 58, 20, 33, 108, 72, 55, 13, 48]
idx_offset = 63, self.episode_index = 362, len(self.buffer) = 300
ep_idx[i] = 327
buffer_index = 264 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 141
buffer_index = 78 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 70
buffer_index = 7 , len(sampled_ep) = 198, trace = from 13 to 21
ep_idx[i] = 334
buffer_index = 271 , len(sampled_ep) = 113, trace = from 95 to 103
ep_idx[i] = 79
buffer_index = 16 , len(sampled_ep) = 75, trace = from 47 to 55
ep_idx[i] = 196
buffer_index = 133 , len(sampled_ep) = 135, trace = from 77 to 85
ep_idx[i] = 176
buffer_index = 113 , len(sampled_ep) = 80, trac

buffer_index = 22 , len(sampled_ep) = 108, trace = from 33 to 41
ep_idx[i] = 324
buffer_index = 261 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 334
buffer_index = 271 , len(sampled_ep) = 113, trace = from 77 to 85
ep_idx[i] = 227
buffer_index = 164 , len(sampled_ep) = 112, trace = from 44 to 52
ep_idx[i] = 138
buffer_index = 75 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 340
buffer_index = 277 , len(sampled_ep) = 142, trace = from 74 to 82
ep_idx[i] = 107
buffer_index = 44 , len(sampled_ep) = 109, trace = from 69 to 77
ep_idx[i] = 205
buffer_index = 142 , len(sampled_ep) = 78, trace = from 53 to 61
ep_idx[i] = 334
buffer_index = 271 , len(sampled_ep) = 113, trace = from 40 to 48
ep_idx[i] = 242
buffer_index = 179 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 115
buffer_index = 52 , len(sampled_ep) = 299, trace = from 40 to 48
ep_idx[i] = 111
buffer_index = 48 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 337
buffer_index = 274 , len(s

idx_offset = 63, self.episode_index = 362, len(self.buffer) = 300
ep_idx[i] = 321
buffer_index = 258 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 119
buffer_index = 56 , len(sampled_ep) = 183, trace = from 52 to 60
ep_idx[i] = 257
buffer_index = 194 , len(sampled_ep) = 82, trace = from 68 to 76
ep_idx[i] = 255
buffer_index = 192 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 331
buffer_index = 268 , len(sampled_ep) = 84, trace = from 61 to 69
ep_idx[i] = 334
buffer_index = 271 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 161
buffer_index = 98 , len(sampled_ep) = 114, trace = from 72 to 80
ep_idx[i] = 226
buffer_index = 163 , len(sampled_ep) = 120, trace = from 78 to 86
ep_idx[i] = 117
buffer_index = 54 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 312
buffer_index = 249 , len(sampled_ep) = 108, trace = from 42 to 50
ep_idx[i] = 334
buffer_index = 271 , len(sampled_ep) = 113, trace = from 55 to 63
ep_idx[i] = 92
buffer_index = 29 , len

buffer_index = 137 , len(sampled_ep) = 82, trace = from 8 to 16
ep_idx[i] = 161
buffer_index = 98 , len(sampled_ep) = 114, trace = from 4 to 12
ep_idx[i] = 73
buffer_index = 10 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 334
buffer_index = 271 , len(sampled_ep) = 113, trace = from 63 to 71
ep_idx[i] = 227
buffer_index = 164 , len(sampled_ep) = 112, trace = from 41 to 49
ep_idx[i] = 322
buffer_index = 259 , len(sampled_ep) = 111, trace = from 94 to 102
ep_idx[i] = 64
buffer_index = 1 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 255
buffer_index = 192 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 350
buffer_index = 287 , len(sampled_ep) = 198, trace = from 175 to 183
ep_idx[i] = 95
buffer_index = 32 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 123
buffer_index = 60 , len(sampled_ep) = 198, trace = from 31 to 39
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9941260000000485
epsilon is = 0.9941245000000485
epsilon is = 0.9941230000000485


buffer_index = 122 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 246
buffer_index = 182 , len(sampled_ep) = 47, trace = from 35 to 43
ep_idx[i] = 260
buffer_index = 196 , len(sampled_ep) = 108, trace = from 65 to 73
ep_idx[i] = 342
buffer_index = 278 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 334
buffer_index = 270 , len(sampled_ep) = 113, trace = from 83 to 91
ep_idx[i] = 144
buffer_index = 80 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 214
buffer_index = 150 , len(sampled_ep) = 75, trace = from 67 to 75
ep_idx[i] = 334
buffer_index = 270 , len(sampled_ep) = 113, trace = from 95 to 103
ep_idx[i] = 92
buffer_index = 28 , len(sampled_ep) = 281, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 270 , len(sampled_ep) = 113, trace = from 98 to 106
ep_idx[i] = 151
buffer_index = 87 , len(sampled_ep) = 82, trace = from 42 to 50
ep_idx[i] = 331
buffer_index = 267 , len(sampled_ep) = 84, trace = from 15 to 23
ep_idx[i] = 286
buffer_index = 222 , len(s

buffer_index = 227 , len(sampled_ep) = 103, trace = from 77 to 85
ep_idx[i] = 211
buffer_index = 147 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 242
buffer_index = 178 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 193
buffer_index = 129 , len(sampled_ep) = 108, trace = from 2 to 10
ep_idx[i] = 258
buffer_index = 194 , len(sampled_ep) = 76, trace = from 38 to 46
ep_idx[i] = 118
buffer_index = 54 , len(sampled_ep) = 111, trace = from 70 to 78
ep_idx[i] = 97
buffer_index = 33 , len(sampled_ep) = 81, trace = from 71 to 79
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9940885000000488
epsilon is = 0.9940870000000488
epsilon is = 0.9940855000000488
epsilon is = 0.9940840000000488
epsilon is = 0.9940825000000488
Target Set Success
ep_idx = [132, 334, 334, 192, 157, 243, 329, 349, 168, 299, 120, 187, 133, 221, 319, 71, 147, 334, 239, 338, 332, 162, 245, 227, 104, 315, 222, 359, 349, 179, 322, 304]
exp_idx = [9, 78, 70, 54, 32, 73, 51, 53, 75, 11, 20, 23, 18, 59, 59,

buffer_index = 241 , len(sampled_ep) = 71, trace = from 7 to 15
ep_idx[i] = 340
buffer_index = 276 , len(sampled_ep) = 142, trace = from 19 to 27
ep_idx[i] = 114
buffer_index = 50 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 237
buffer_index = 173 , len(sampled_ep) = 116, trace = from 30 to 38
ep_idx[i] = 267
buffer_index = 203 , len(sampled_ep) = 83, trace = from 4 to 12
ep_idx[i] = 258
buffer_index = 194 , len(sampled_ep) = 76, trace = from 7 to 15
ep_idx[i] = 218
buffer_index = 154 , len(sampled_ep) = 80, trace = from 68 to 76
ep_idx[i] = 222
buffer_index = 158 , len(sampled_ep) = 224, trace = from 195 to 203
ep_idx[i] = 319
buffer_index = 255 , len(sampled_ep) = 70, trace = from 46 to 54
ep_idx[i] = 68
buffer_index = 4 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 318
buffer_index = 254 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 360
buffer_index = 296 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 342
buffer_index = 278 , len(sampl

buffer_index = 267 , len(sampled_ep) = 84, trace = from 32 to 40
ep_idx[i] = 106
buffer_index = 42 , len(sampled_ep) = 103, trace = from 9 to 17
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9940510000000491
epsilon is = 0.9940495000000491
epsilon is = 0.9940480000000491
epsilon is = 0.9940465000000491
epsilon is = 0.9940450000000491
Target Set Success
ep_idx = [153, 71, 214, 208, 299, 125, 171, 247, 196, 342, 91, 251, 150, 139, 312, 306, 70, 80, 68, 144, 334, 112, 76, 233, 263, 357, 180, 83, 310, 311, 64, 188]
exp_idx = [20, 11, 63, 74, 37, 8, 68, 16, 96, 42, 15, 73, 70, 130, 19, 25, 56, 56, 72, 16, 105, 18, 19, 15, 13, 42, 12, 52, 108, 16, 27, 72]
idx_offset = 64, self.episode_index = 363, len(self.buffer) = 300
ep_idx[i] = 153
buffer_index = 89 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 71
buffer_index = 7 , len(sampled_ep) = 299, trace = from 4 to 12
ep_idx[i] = 214
buffer_index = 150 , len(sampled_ep) = 75, trace = from 56 to 64
ep_idx[i] = 208
buffer_index = 144 

buffer_index = 7 , len(sampled_ep) = 82, trace = from 43 to 51
ep_idx[i] = 310
buffer_index = 245 , len(sampled_ep) = 164, trace = from 154 to 162
ep_idx[i] = 205
buffer_index = 140 , len(sampled_ep) = 78, trace = from 59 to 67
ep_idx[i] = 279
buffer_index = 214 , len(sampled_ep) = 48, trace = from 10 to 18
ep_idx[i] = 192
buffer_index = 127 , len(sampled_ep) = 78, trace = from 70 to 78
ep_idx[i] = 249
buffer_index = 184 , len(sampled_ep) = 75, trace = from 57 to 65
ep_idx[i] = 334
buffer_index = 269 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 80
buffer_index = 15 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 259
buffer_index = 194 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 277
buffer_index = 212 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 292
buffer_index = 227 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 92
buffer_index = 27 , len(sampled_ep) = 281, trace = from 4 to 12
ep_idx[i] = 345
buffer_index = 280 , len(sampl

ep_idx = [333, 193, 95, 305, 233, 281, 353, 159, 137, 144, 340, 264, 195, 259, 334, 218, 315, 253, 220, 152, 144, 203, 178, 137, 118, 202, 280, 238, 199, 206, 236, 358]
exp_idx = [29, 40, 18, 44, 12, 91, 13, 15, 71, 46, 90, 15, 57, 71, 94, 16, 17, 11, 21, 16, 69, 22, 11, 15, 45, 50, 17, 14, 21, 66, 73, 38]
idx_offset = 65, self.episode_index = 364, len(self.buffer) = 300
ep_idx[i] = 333
buffer_index = 268 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 193
buffer_index = 128 , len(sampled_ep) = 108, trace = from 33 to 41
ep_idx[i] = 95
buffer_index = 30 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 305
buffer_index = 240 , len(sampled_ep) = 71, trace = from 37 to 45
ep_idx[i] = 233
buffer_index = 168 , len(sampled_ep) = 45, trace = from 5 to 13
ep_idx[i] = 281
buffer_index = 216 , len(sampled_ep) = 178, trace = from 84 to 92
ep_idx[i] = 353
buffer_index = 288 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 159
buffer_index = 94 , len(sampled_ep) = 79, tra

ep_idx[i] = 76
buffer_index = 11 , len(sampled_ep) = 198, trace = from 111 to 119
ep_idx[i] = 348
buffer_index = 283 , len(sampled_ep) = 77, trace = from 21 to 29
ep_idx[i] = 128
buffer_index = 63 , len(sampled_ep) = 74, trace = from 0 to 8
ep_idx[i] = 226
buffer_index = 161 , len(sampled_ep) = 120, trace = from 16 to 24
ep_idx[i] = 113
buffer_index = 48 , len(sampled_ep) = 105, trace = from 50 to 58
ep_idx[i] = 290
buffer_index = 225 , len(sampled_ep) = 152, trace = from 28 to 36
ep_idx[i] = 330
buffer_index = 265 , len(sampled_ep) = 175, trace = from 124 to 132
ep_idx[i] = 160
buffer_index = 95 , len(sampled_ep) = 117, trace = from 76 to 84
ep_idx[i] = 151
buffer_index = 86 , len(sampled_ep) = 82, trace = from 20 to 28
ep_idx[i] = 84
buffer_index = 19 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 106
buffer_index = 41 , len(sampled_ep) = 103, trace = from 68 to 76
ep_idx[i] = 134
buffer_index = 69 , len(sampled_ep) = 208, trace = from 22 to 30
ep_idx[i] = 344
buffer_index

buffer_index = 131 , len(sampled_ep) = 135, trace = from 30 to 38
ep_idx[i] = 330
buffer_index = 265 , len(sampled_ep) = 175, trace = from 147 to 155
ep_idx[i] = 322
buffer_index = 257 , len(sampled_ep) = 111, trace = from 0 to 8
ep_idx[i] = 332
buffer_index = 267 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 201
buffer_index = 136 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 220
buffer_index = 155 , len(sampled_ep) = 118, trace = from 99 to 107
ep_idx[i] = 290
buffer_index = 225 , len(sampled_ep) = 152, trace = from 30 to 38
ep_idx[i] = 291
buffer_index = 226 , len(sampled_ep) = 103, trace = from 42 to 50
ep_idx[i] = 105
buffer_index = 40 , len(sampled_ep) = 74, trace = from 3 to 11
ep_idx[i] = 310
buffer_index = 245 , len(sampled_ep) = 164, trace = from 3 to 11
ep_idx[i] = 184
buffer_index = 119 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 105
buffer_index = 40 , len(sampled_ep) = 74, trace = from 5 to 13
ep_idx[i] = 349
buffer_index = 284 , len(

buffer_index = 38 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 336
buffer_index = 271 , len(sampled_ep) = 102, trace = from 11 to 19
ep_idx[i] = 318
buffer_index = 253 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 292
buffer_index = 227 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 128
buffer_index = 63 , len(sampled_ep) = 74, trace = from 34 to 42
ep_idx[i] = 241
buffer_index = 176 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 167
buffer_index = 102 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 312
buffer_index = 247 , len(sampled_ep) = 108, trace = from 30 to 38
ep_idx[i] = 272
buffer_index = 207 , len(sampled_ep) = 70, trace = from 11 to 19
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9939535000000499
epsilon is = 0.9939520000000499
epsilon is = 0.9939505000000499
epsilon is = 0.9939490000000499
epsilon is = 0.9939475000000499
Target Set Success
ep_idx = [331, 234, 315, 259, 189, 205, 350, 304, 124, 259, 180, 118, 340, 

buffer_index = 124 , len(sampled_ep) = 84, trace = from 45 to 53
ep_idx[i] = 178
buffer_index = 113 , len(sampled_ep) = 80, trace = from 57 to 65
ep_idx[i] = 292
buffer_index = 227 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 136
buffer_index = 71 , len(sampled_ep) = 108, trace = from 12 to 20
ep_idx[i] = 205
buffer_index = 140 , len(sampled_ep) = 78, trace = from 36 to 44
ep_idx[i] = 222
buffer_index = 157 , len(sampled_ep) = 224, trace = from 99 to 107
ep_idx[i] = 120
buffer_index = 55 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 363
buffer_index = 298 , len(sampled_ep) = 73, trace = from 42 to 50
ep_idx[i] = 123
buffer_index = 58 , len(sampled_ep) = 198, trace = from 134 to 142
ep_idx[i] = 229
buffer_index = 164 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 259
buffer_index = 194 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 286
buffer_index = 221 , len(sampled_ep) = 147, trace = from 83 to 91
ep_idx[i] = 111
buffer_index = 46 , len(

buffer_index = 213 , len(sampled_ep) = 48, trace = from 4 to 12
ep_idx[i] = 254
buffer_index = 188 , len(sampled_ep) = 116, trace = from 76 to 84
ep_idx[i] = 332
buffer_index = 266 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 252
buffer_index = 186 , len(sampled_ep) = 81, trace = from 54 to 62
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9939160000000502
epsilon is = 0.9939145000000502
epsilon is = 0.9939130000000502
epsilon is = 0.9939115000000502
epsilon is = 0.9939100000000503
Target Set Success
ep_idx = [118, 144, 70, 134, 337, 135, 143, 162, 197, 263, 327, 268, 357, 272, 261, 92, 117, 300, 137, 238, 213, 184, 315, 266, 296, 174, 222, 267, 320, 171, 264, 150]
exp_idx = [29, 78, 181, 115, 76, 23, 33, 21, 47, 24, 33, 60, 44, 42, 28, 57, 17, 65, 73, 50, 139, 58, 77, 67, 67, 7, 109, 25, 67, 69, 86, 55]
idx_offset = 66, self.episode_index = 365, len(self.buffer) = 300
ep_idx[i] = 118
buffer_index = 52 , len(sampled_ep) = 111, trace = from 22 to 30
ep_idx[i] = 144
buffer_

buffer_index = 133 , len(sampled_ep) = 116, trace = from 30 to 38
ep_idx[i] = 164
buffer_index = 98 , len(sampled_ep) = 118, trace = from 40 to 48
ep_idx[i] = 326
buffer_index = 260 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 124
buffer_index = 58 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 326
buffer_index = 260 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 297
buffer_index = 231 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 277
buffer_index = 211 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 253
buffer_index = 187 , len(sampled_ep) = 76, trace = from 34 to 42
ep_idx[i] = 242
buffer_index = 176 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 72
buffer_index = 6 , len(sampled_ep) = 82, trace = from 0 to 8
ep_idx[i] = 348
buffer_index = 282 , len(sampled_ep) = 77, trace = from 21 to 29
ep_idx[i] = 353
buffer_index = 287 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 184
buffer_index = 118 , len(sampled_ep) 

epsilon is = 0.9938740000000506
epsilon is = 0.9938725000000506
Target Set Success
ep_idx = [92, 129, 249, 172, 197, 106, 236, 107, 200, 219, 157, 134, 257, 319, 292, 280, 187, 154, 281, 190, 119, 97, 137, 168, 204, 106, 302, 134, 71, 335, 334, 142]
exp_idx = [225, 34, 29, 41, 76, 15, 23, 77, 63, 47, 46, 26, 58, 35, 66, 57, 79, 81, 122, 16, 86, 16, 13, 67, 16, 64, 29, 119, 76, 21, 83, 41]
idx_offset = 66, self.episode_index = 365, len(self.buffer) = 300
ep_idx[i] = 92
buffer_index = 26 , len(sampled_ep) = 281, trace = from 218 to 226
ep_idx[i] = 129
buffer_index = 63 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 249
buffer_index = 183 , len(sampled_ep) = 75, trace = from 22 to 30
ep_idx[i] = 172
buffer_index = 106 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 197
buffer_index = 131 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 106
buffer_index = 40 , len(sampled_ep) = 103, trace = from 8 to 16
ep_idx[i] = 236
buffer_index = 170 , len(sampled_ep) = 81

buffer_index = 248 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 71
buffer_index = 5 , len(sampled_ep) = 299, trace = from 279 to 287
ep_idx[i] = 217
buffer_index = 151 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 136
buffer_index = 70 , len(sampled_ep) = 108, trace = from 91 to 99
ep_idx[i] = 287
buffer_index = 221 , len(sampled_ep) = 194, trace = from 65 to 73
ep_idx[i] = 135
buffer_index = 69 , len(sampled_ep) = 211, trace = from 124 to 132
ep_idx[i] = 314
buffer_index = 248 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 165
buffer_index = 99 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 342
buffer_index = 276 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 149
buffer_index = 83 , len(sampled_ep) = 82, trace = from 72 to 80
ep_idx[i] = 93
buffer_index = 27 , len(sampled_ep) = 77, trace = from 37 to 45
ep_idx[i] = 144
buffer_index = 78 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 211
buffer_index = 145 , len(sampl

buffer_index = 52 , len(sampled_ep) = 111, trace = from 39 to 47
ep_idx[i] = 214
buffer_index = 148 , len(sampled_ep) = 75, trace = from 53 to 61
ep_idx[i] = 345
buffer_index = 279 , len(sampled_ep) = 108, trace = from 12 to 20
ep_idx[i] = 342
buffer_index = 276 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 274
buffer_index = 208 , len(sampled_ep) = 114, trace = from 29 to 37
ep_idx[i] = 191
buffer_index = 125 , len(sampled_ep) = 131, trace = from 50 to 58
ep_idx[i] = 332
buffer_index = 266 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 211
buffer_index = 145 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 306
buffer_index = 240 , len(sampled_ep) = 117, trace = from 81 to 89
ep_idx[i] = 330
buffer_index = 264 , len(sampled_ep) = 175, trace = from 8 to 16
ep_idx[i] = 155
buffer_index = 89 , len(sampled_ep) = 77, trace = from 46 to 54
ep_idx[i] = 349
buffer_index = 283 , len(sampled_ep) = 228, trace = from 31 to 39
ep_idx[i] = 334
buffer_index = 268 , len

buffer_index = 5 , len(sampled_ep) = 299, trace = from 175 to 183
ep_idx[i] = 334
buffer_index = 268 , len(sampled_ep) = 113, trace = from 90 to 98
ep_idx[i] = 144
buffer_index = 78 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 129
buffer_index = 63 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 295
buffer_index = 229 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 113
buffer_index = 47 , len(sampled_ep) = 105, trace = from 48 to 56
ep_idx[i] = 349
buffer_index = 283 , len(sampled_ep) = 228, trace = from 40 to 48
ep_idx[i] = 332
buffer_index = 266 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 92
buffer_index = 26 , len(sampled_ep) = 281, trace = from 38 to 46
ep_idx[i] = 113
buffer_index = 47 , len(sampled_ep) = 105, trace = from 76 to 84
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.993818500000051
epsilon is = 0.993817000000051
epsilon is = 0.993815500000051
epsilon is = 0.993814000000051
epsilon is = 0.9938125000000511
Target Set Succes

buffer_index = 66 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 93
buffer_index = 27 , len(sampled_ep) = 77, trace = from 38 to 46
ep_idx[i] = 340
buffer_index = 274 , len(sampled_ep) = 142, trace = from 11 to 19
ep_idx[i] = 108
buffer_index = 42 , len(sampled_ep) = 45, trace = from 5 to 13
ep_idx[i] = 349
buffer_index = 283 , len(sampled_ep) = 228, trace = from 148 to 156
ep_idx[i] = 103
buffer_index = 37 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 334
buffer_index = 268 , len(sampled_ep) = 113, trace = from 61 to 69
ep_idx[i] = 242
buffer_index = 176 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 250
buffer_index = 184 , len(sampled_ep) = 49, trace = from 13 to 21
ep_idx[i] = 285
buffer_index = 219 , len(sampled_ep) = 166, trace = from 64 to 72
ep_idx[i] = 298
buffer_index = 232 , len(sampled_ep) = 115, trace = from 11 to 19
ep_idx[i] = 92
buffer_index = 26 , len(sampled_ep) = 281, trace = from 130 to 138
ep_idx[i] = 76
buffer_index = 10 , len(sam

ep_idx[i] = 215
buffer_index = 148 , len(sampled_ep) = 117, trace = from 46 to 54
ep_idx[i] = 327
buffer_index = 260 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 334
buffer_index = 267 , len(sampled_ep) = 113, trace = from 62 to 70
ep_idx[i] = 351
buffer_index = 284 , len(sampled_ep) = 143, trace = from 48 to 56
ep_idx[i] = 184
buffer_index = 117 , len(sampled_ep) = 81, trace = from 57 to 65
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9937810000000513
epsilon is = 0.9937795000000513
epsilon is = 0.9937780000000513
epsilon is = 0.9937765000000514
epsilon is = 0.9937750000000514
Target Set Success
ep_idx = [142, 300, 71, 199, 281, 90, 185, 208, 355, 118, 232, 220, 292, 120, 356, 214, 332, 205, 212, 123, 203, 202, 362, 209, 326, 175, 71, 291, 175, 144, 329, 229]
exp_idx = [89, 20, 237, 67, 9, 49, 19, 26, 56, 82, 17, 47, 57, 35, 51, 44, 56, 14, 17, 156, 17, 13, 50, 19, 80, 30, 73, 24, 68, 17, 8, 52]
idx_offset = 67, self.episode_index = 366, len(self.buffer) = 300
ep_idx[i

buffer_index = 39 , len(sampled_ep) = 103, trace = from 33 to 41
ep_idx[i] = 175
buffer_index = 108 , len(sampled_ep) = 102, trace = from 43 to 51
ep_idx[i] = 92
buffer_index = 25 , len(sampled_ep) = 281, trace = from 31 to 39
ep_idx[i] = 115
buffer_index = 48 , len(sampled_ep) = 299, trace = from 71 to 79
ep_idx[i] = 340
buffer_index = 273 , len(sampled_ep) = 142, trace = from 109 to 117
ep_idx[i] = 334
buffer_index = 267 , len(sampled_ep) = 113, trace = from 72 to 80
ep_idx[i] = 349
buffer_index = 282 , len(sampled_ep) = 228, trace = from 140 to 148
ep_idx[i] = 315
buffer_index = 248 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 297
buffer_index = 230 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 248
buffer_index = 181 , len(sampled_ep) = 77, trace = from 53 to 61
ep_idx[i] = 119
buffer_index = 52 , len(sampled_ep) = 183, trace = from 159 to 167
ep_idx[i] = 80
buffer_index = 13 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 334
buffer_index = 267 , 

epsilon is = 0.9937420000000516
epsilon is = 0.9937405000000517
epsilon is = 0.9937390000000517
epsilon is = 0.9937375000000517
Target Set Success
ep_idx = [196, 342, 332, 333, 281, 331, 178, 349, 281, 330, 227, 116, 102, 291, 116, 336, 333, 277, 178, 254, 249, 143, 182, 342, 228, 351, 222, 310, 177, 154, 292, 71]
exp_idx = [41, 74, 69, 77, 140, 78, 63, 96, 113, 117, 84, 75, 15, 13, 16, 32, 71, 24, 37, 24, 45, 60, 61, 55, 12, 86, 105, 54, 17, 90, 20, 214]
idx_offset = 67, self.episode_index = 366, len(self.buffer) = 300
ep_idx[i] = 196
buffer_index = 129 , len(sampled_ep) = 135, trace = from 34 to 42
ep_idx[i] = 342
buffer_index = 275 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 332
buffer_index = 265 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 333
buffer_index = 266 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 281
buffer_index = 214 , len(sampled_ep) = 178, trace = from 133 to 141
ep_idx[i] = 331
buffer_index = 264 , len(sampled_ep) = 84, trace 

buffer_index = 193 , len(sampled_ep) = 108, trace = from 77 to 85
ep_idx[i] = 202
buffer_index = 135 , len(sampled_ep) = 143, trace = from 86 to 94
ep_idx[i] = 334
buffer_index = 267 , len(sampled_ep) = 113, trace = from 65 to 73
ep_idx[i] = 332
buffer_index = 265 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 109
buffer_index = 42 , len(sampled_ep) = 48, trace = from 8 to 16
ep_idx[i] = 115
buffer_index = 48 , len(sampled_ep) = 299, trace = from 50 to 58
ep_idx[i] = 70
buffer_index = 3 , len(sampled_ep) = 198, trace = from 38 to 46
ep_idx[i] = 309
buffer_index = 242 , len(sampled_ep) = 115, trace = from 106 to 114
ep_idx[i] = 327
buffer_index = 260 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 159
buffer_index = 92 , len(sampled_ep) = 79, trace = from 23 to 31
ep_idx[i] = 196
buffer_index = 129 , len(sampled_ep) = 135, trace = from 50 to 58
ep_idx[i] = 140
buffer_index = 73 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 248
buffer_index = 181 , len(samp

exp_idx = [46, 44, 41, 11, 18, 58, 103, 23, 60, 28, 140, 15, 27, 103, 48, 78, 110, 58, 73, 111, 78, 92, 59, 63, 13, 42, 46, 41, 30, 46, 16, 20]
idx_offset = 67, self.episode_index = 366, len(self.buffer) = 300
ep_idx[i] = 92
buffer_index = 25 , len(sampled_ep) = 281, trace = from 39 to 47
ep_idx[i] = 275
buffer_index = 208 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 145
buffer_index = 78 , len(sampled_ep) = 117, trace = from 34 to 42
ep_idx[i] = 179
buffer_index = 112 , len(sampled_ep) = 77, trace = from 4 to 12
ep_idx[i] = 118
buffer_index = 51 , len(sampled_ep) = 111, trace = from 11 to 19
ep_idx[i] = 325
buffer_index = 258 , len(sampled_ep) = 82, trace = from 51 to 59
ep_idx[i] = 136
buffer_index = 69 , len(sampled_ep) = 108, trace = from 96 to 104
ep_idx[i] = 160
buffer_index = 93 , len(sampled_ep) = 117, trace = from 16 to 24
ep_idx[i] = 72
buffer_index = 5 , len(sampled_ep) = 82, trace = from 53 to 61
ep_idx[i] = 337
buffer_index = 270 , len(sampled_ep) = 82, trace 

ep_idx[i] = 331
buffer_index = 264 , len(sampled_ep) = 84, trace = from 50 to 58
ep_idx[i] = 92
buffer_index = 25 , len(sampled_ep) = 281, trace = from 30 to 38
ep_idx[i] = 157
buffer_index = 90 , len(sampled_ep) = 103, trace = from 9 to 17
ep_idx[i] = 299
buffer_index = 232 , len(sampled_ep) = 100, trace = from 51 to 59
ep_idx[i] = 121
buffer_index = 54 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 187
buffer_index = 120 , len(sampled_ep) = 84, trace = from 61 to 69
ep_idx[i] = 186
buffer_index = 119 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 213
buffer_index = 146 , len(sampled_ep) = 148, trace = from 124 to 132
ep_idx[i] = 198
buffer_index = 131 , len(sampled_ep) = 42, trace = from 7 to 15
ep_idx[i] = 215
buffer_index = 148 , len(sampled_ep) = 117, trace = from 59 to 67
ep_idx[i] = 336
buffer_index = 269 , len(sampled_ep) = 102, trace = from 11 to 19
ep_idx[i] = 186
buffer_index = 119 , len(sampled_ep) = 81, trace = from 10 to 18
sampledTraces.shape = (32, 

buffer_index = 264 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 93
buffer_index = 25 , len(sampled_ep) = 77, trace = from 22 to 30
ep_idx[i] = 142
buffer_index = 74 , len(sampled_ep) = 108, trace = from 8 to 16
ep_idx[i] = 123
buffer_index = 55 , len(sampled_ep) = 198, trace = from 174 to 182
ep_idx[i] = 301
buffer_index = 233 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 207
buffer_index = 139 , len(sampled_ep) = 75, trace = from 5 to 13
ep_idx[i] = 202
buffer_index = 134 , len(sampled_ep) = 143, trace = from 65 to 73
ep_idx[i] = 335
buffer_index = 267 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 203
buffer_index = 135 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 329
buffer_index = 261 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 351
buffer_index = 283 , len(sampled_ep) = 143, trace = from 36 to 44
ep_idx[i] = 140
buffer_index = 72 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 317
buffer_index = 249 , len(samp

ep_idx[i] = 70
buffer_index = 2 , len(sampled_ep) = 198, trace = from 130 to 138
ep_idx[i] = 325
buffer_index = 257 , len(sampled_ep) = 82, trace = from 42 to 50
ep_idx[i] = 180
buffer_index = 112 , len(sampled_ep) = 141, trace = from 91 to 99
ep_idx[i] = 142
buffer_index = 74 , len(sampled_ep) = 108, trace = from 68 to 76
ep_idx[i] = 155
buffer_index = 87 , len(sampled_ep) = 77, trace = from 18 to 26
ep_idx[i] = 70
buffer_index = 2 , len(sampled_ep) = 198, trace = from 157 to 165
ep_idx[i] = 274
buffer_index = 206 , len(sampled_ep) = 114, trace = from 10 to 18
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9936460000000524
epsilon is = 0.9936445000000524
epsilon is = 0.9936430000000525
epsilon is = 0.9936415000000525
epsilon is = 0.9936400000000525
Target Set Success
ep_idx = [102, 73, 331, 267, 110, 358, 327, 118, 295, 318, 113, 214, 184, 332, 219, 317, 332, 330, 70, 341, 104, 228, 178, 113, 334, 312, 142, 349, 175, 288, 219, 234]
exp_idx = [10, 61, 56, 41, 123, 63, 65, 28, 20, 39, 

buffer_index = 177 , len(sampled_ep) = 75, trace = from 39 to 47
ep_idx[i] = 329
buffer_index = 261 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 137
buffer_index = 69 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 71
buffer_index = 3 , len(sampled_ep) = 299, trace = from 114 to 122
ep_idx[i] = 200
buffer_index = 132 , len(sampled_ep) = 82, trace = from 66 to 74
ep_idx[i] = 116
buffer_index = 48 , len(sampled_ep) = 162, trace = from 8 to 16
ep_idx[i] = 118
buffer_index = 50 , len(sampled_ep) = 111, trace = from 0 to 8
ep_idx[i] = 313
buffer_index = 245 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 285
buffer_index = 217 , len(sampled_ep) = 166, trace = from 113 to 121
ep_idx[i] = 334
buffer_index = 266 , len(sampled_ep) = 113, trace = from 88 to 96
ep_idx[i] = 86
buffer_index = 18 , len(sampled_ep) = 77, trace = from 48 to 56
ep_idx[i] = 309
buffer_index = 241 , len(sampled_ep) = 115, trace = from 5 to 13
ep_idx[i] = 87
buffer_index = 19 , len(sampled_

buffer_index = 87 , len(sampled_ep) = 77, trace = from 42 to 50
ep_idx[i] = 349
buffer_index = 281 , len(sampled_ep) = 228, trace = from 149 to 157
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9936085000000527
epsilon is = 0.9936070000000528
epsilon is = 0.9936055000000528
epsilon is = 0.9936040000000528
epsilon is = 0.9936025000000528
Target Set Success
ep_idx = [324, 342, 200, 350, 160, 213, 246, 210, 76, 334, 275, 334, 188, 162, 334, 334, 98, 149, 92, 119, 209, 334, 252, 340, 223, 129, 134, 356, 125, 202, 349, 82]
exp_idx = [79, 46, 15, 78, 23, 133, 19, 78, 16, 103, 52, 104, 70, 103, 71, 107, 7, 56, 223, 48, 37, 108, 17, 130, 108, 79, 120, 93, 14, 46, 148, 45]
idx_offset = 68, self.episode_index = 367, len(self.buffer) = 300
ep_idx[i] = 324
buffer_index = 256 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 342
buffer_index = 274 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 200
buffer_index = 132 , len(sampled_ep) = 82, trace = from 8 to 16
ep_idx[i] = 350
b

ep_idx[i] = 297
buffer_index = 229 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 194
buffer_index = 126 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 340
buffer_index = 272 , len(sampled_ep) = 142, trace = from 8 to 16
ep_idx[i] = 115
buffer_index = 47 , len(sampled_ep) = 299, trace = from 256 to 264
ep_idx[i] = 263
buffer_index = 195 , len(sampled_ep) = 51, trace = from 27 to 35
ep_idx[i] = 332
buffer_index = 264 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 318
buffer_index = 250 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 216
buffer_index = 148 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 287
buffer_index = 219 , len(sampled_ep) = 194, trace = from 100 to 108
ep_idx[i] = 236
buffer_index = 168 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 330
buffer_index = 262 , len(sampled_ep) = 175, trace = from 33 to 41
ep_idx[i] = 80
buffer_index = 12 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 334
buffer_index 

ep_idx = [312, 318, 257, 286, 218, 275, 156, 76, 289, 355, 311, 130, 310, 123, 115, 142, 264, 354, 289, 138, 273, 286, 336, 363, 191, 233, 200, 162, 139, 128, 172, 312]
exp_idx = [65, 40, 18, 15, 44, 17, 16, 158, 19, 15, 83, 33, 136, 103, 236, 44, 77, 7, 18, 61, 16, 93, 24, 17, 120, 8, 75, 101, 105, 13, 80, 82]
idx_offset = 68, self.episode_index = 367, len(self.buffer) = 300
ep_idx[i] = 312
buffer_index = 244 , len(sampled_ep) = 108, trace = from 58 to 66
ep_idx[i] = 318
buffer_index = 250 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 257
buffer_index = 189 , len(sampled_ep) = 82, trace = from 11 to 19
ep_idx[i] = 286
buffer_index = 218 , len(sampled_ep) = 147, trace = from 8 to 16
ep_idx[i] = 218
buffer_index = 150 , len(sampled_ep) = 80, trace = from 37 to 45
ep_idx[i] = 275
buffer_index = 207 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 156
buffer_index = 88 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 76
buffer_index = 8 , len(sampled_ep) = 198

buffer_index = 155 , len(sampled_ep) = 46, trace = from 12 to 20
ep_idx[i] = 193
buffer_index = 124 , len(sampled_ep) = 108, trace = from 46 to 54
ep_idx[i] = 289
buffer_index = 220 , len(sampled_ep) = 83, trace = from 75 to 83
ep_idx[i] = 256
buffer_index = 187 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 314
buffer_index = 245 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 334
buffer_index = 265 , len(sampled_ep) = 113, trace = from 87 to 95
ep_idx[i] = 160
buffer_index = 91 , len(sampled_ep) = 117, trace = from 41 to 49
ep_idx[i] = 334
buffer_index = 265 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 224
buffer_index = 155 , len(sampled_ep) = 46, trace = from 9 to 17
ep_idx[i] = 147
buffer_index = 78 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 71
buffer_index = 2 , len(sampled_ep) = 299, trace = from 88 to 96
ep_idx[i] = 268
buffer_index = 199 , len(sampled_ep) = 120, trace = from 33 to 41
ep_idx[i] = 92
buffer_index = 23 , len(sam

buffer_index = 269 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 295
buffer_index = 226 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 331
buffer_index = 262 , len(sampled_ep) = 84, trace = from 57 to 65
ep_idx[i] = 223
buffer_index = 154 , len(sampled_ep) = 112, trace = from 97 to 105
ep_idx[i] = 332
buffer_index = 263 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 108
buffer_index = 39 , len(sampled_ep) = 45, trace = from 35 to 43
ep_idx[i] = 98
buffer_index = 29 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 225
buffer_index = 156 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 103
buffer_index = 34 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 87
buffer_index = 18 , len(sampled_ep) = 76, trace = from 28 to 36
ep_idx[i] = 347
buffer_index = 278 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 158
buffer_index = 89 , len(sampled_ep) = 83, trace = from 48 to 56
ep_idx[i] = 149
buffer_index = 80 , len(sampled_e

buffer_index = 90 , len(sampled_ep) = 79, trace = from 47 to 55
ep_idx[i] = 257
buffer_index = 188 , len(sampled_ep) = 82, trace = from 70 to 78
ep_idx[i] = 194
buffer_index = 125 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 131
buffer_index = 62 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 320
buffer_index = 251 , len(sampled_ep) = 71, trace = from 50 to 58
ep_idx[i] = 294
buffer_index = 225 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 185
buffer_index = 116 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 265 , len(sampled_ep) = 113, trace = from 103 to 111
ep_idx[i] = 225
buffer_index = 156 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 137
buffer_index = 68 , len(sampled_ep) = 81, trace = from 71 to 79
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9935110000000535
epsilon is = 0.9935095000000536
epsilon is = 0.9935080000000536
epsilon is = 0.9935065000000536
epsilon is = 0.9935050000000536
Target Set Suc

buffer_index = 241 , len(sampled_ep) = 164, trace = from 40 to 48
ep_idx[i] = 275
buffer_index = 206 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 356
buffer_index = 287 , len(sampled_ep) = 100, trace = from 86 to 94
ep_idx[i] = 277
buffer_index = 208 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 147
buffer_index = 78 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 267
buffer_index = 198 , len(sampled_ep) = 83, trace = from 20 to 28
ep_idx[i] = 318
buffer_index = 249 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 319
buffer_index = 250 , len(sampled_ep) = 70, trace = from 10 to 18
ep_idx[i] = 138
buffer_index = 69 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 123
buffer_index = 54 , len(sampled_ep) = 198, trace = from 89 to 97
ep_idx[i] = 70
buffer_index = 1 , len(sampled_ep) = 198, trace = from 174 to 182
ep_idx[i] = 236
buffer_index = 167 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 265 , len(samp

buffer_index = 144 , len(sampled_ep) = 148, trace = from 14 to 22
ep_idx[i] = 216
buffer_index = 147 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 278
buffer_index = 209 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 255
buffer_index = 186 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 171
buffer_index = 102 , len(sampled_ep) = 81, trace = from 0 to 8
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9934735000000539
epsilon is = 0.9934720000000539
epsilon is = 0.9934705000000539
epsilon is = 0.9934690000000539
epsilon is = 0.9934675000000539
Target Set Success
ep_idx = [116, 363, 351, 76, 342, 340, 163, 234, 287, 122, 169, 324, 305, 236, 315, 326, 94, 271, 222, 305, 337, 150, 134, 92, 344, 86, 310, 290, 120, 349, 175, 100]
exp_idx = [161, 17, 118, 158, 53, 74, 9, 157, 129, 34, 63, 36, 26, 60, 75, 7, 14, 8, 117, 65, 69, 16, 67, 85, 48, 48, 125, 147, 50, 57, 55, 40]
idx_offset = 69, self.episode_index = 368, len(self.buffer) = 300
ep_idx[i] = 116
buffer_i

buffer_index = 277 , len(sampled_ep) = 70, trace = from 53 to 61
ep_idx[i] = 251
buffer_index = 182 , len(sampled_ep) = 133, trace = from 77 to 85
ep_idx[i] = 193
buffer_index = 124 , len(sampled_ep) = 108, trace = from 11 to 19
ep_idx[i] = 350
buffer_index = 281 , len(sampled_ep) = 198, trace = from 158 to 166
ep_idx[i] = 298
buffer_index = 229 , len(sampled_ep) = 115, trace = from 89 to 97
ep_idx[i] = 195
buffer_index = 126 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 185
buffer_index = 116 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 334
buffer_index = 265 , len(sampled_ep) = 113, trace = from 85 to 93
ep_idx[i] = 291
buffer_index = 222 , len(sampled_ep) = 103, trace = from 39 to 47
ep_idx[i] = 159
buffer_index = 90 , len(sampled_ep) = 79, trace = from 40 to 48
ep_idx[i] = 89
buffer_index = 20 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 88
buffer_index = 19 , len(sampled_ep) = 114, trace = from 102 to 110
ep_idx[i] = 314
buffer_index = 245 , l

buffer_index = 259 , len(sampled_ep) = 81, trace = from 64 to 72
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9934360000000542
epsilon is = 0.9934345000000542
epsilon is = 0.9934330000000542
epsilon is = 0.9934315000000542
epsilon is = 0.9934300000000542
Target Set Success
ep_idx = [343, 211, 286, 242, 362, 211, 202, 303, 213, 188, 135, 301, 322, 124, 330, 75, 206, 232, 273, 272, 73, 260, 357, 330, 331, 198, 334, 135, 305, 79, 116, 247]
exp_idx = [36, 80, 100, 13, 33, 14, 103, 42, 115, 73, 51, 16, 41, 16, 170, 32, 52, 33, 50, 18, 16, 104, 25, 97, 49, 9, 94, 191, 49, 15, 53, 43]
idx_offset = 70, self.episode_index = 369, len(self.buffer) = 300
ep_idx[i] = 343
buffer_index = 273 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 211
buffer_index = 141 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 286
buffer_index = 216 , len(sampled_ep) = 147, trace = from 93 to 101
ep_idx[i] = 242
buffer_index = 172 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 362
buffe

buffer_index = 266 , len(sampled_ep) = 102, trace = from 11 to 19
ep_idx[i] = 262
buffer_index = 192 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 141
buffer_index = 71 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 221
buffer_index = 151 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 246
buffer_index = 176 , len(sampled_ep) = 47, trace = from 23 to 31
ep_idx[i] = 202
buffer_index = 132 , len(sampled_ep) = 143, trace = from 34 to 42
ep_idx[i] = 97
buffer_index = 27 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 306
buffer_index = 236 , len(sampled_ep) = 117, trace = from 81 to 89
ep_idx[i] = 149
buffer_index = 79 , len(sampled_ep) = 82, trace = from 2 to 10
ep_idx[i] = 173
buffer_index = 103 , len(sampled_ep) = 71, trace = from 39 to 47
ep_idx[i] = 356
buffer_index = 286 , len(sampled_ep) = 100, trace = from 27 to 35
ep_idx[i] = 164
buffer_index = 94 , len(sampled_ep) = 118, trace = from 55 to 63
ep_idx[i] = 267
buffer_index = 197 , len(sample

exp_idx = [21, 21, 82, 12, 68, 28, 15, 67, 20, 15, 14, 17, 17, 103, 41, 84, 171, 20, 19, 25, 46, 123, 81, 29, 13, 16, 11, 87, 59, 164, 102, 109]
idx_offset = 70, self.episode_index = 369, len(self.buffer) = 300
ep_idx[i] = 87
buffer_index = 17 , len(sampled_ep) = 76, trace = from 14 to 22
ep_idx[i] = 145
buffer_index = 75 , len(sampled_ep) = 117, trace = from 14 to 22
ep_idx[i] = 71
buffer_index = 1 , len(sampled_ep) = 299, trace = from 75 to 83
ep_idx[i] = 299
buffer_index = 229 , len(sampled_ep) = 100, trace = from 5 to 13
ep_idx[i] = 97
buffer_index = 27 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 330
buffer_index = 260 , len(sampled_ep) = 175, trace = from 21 to 29
ep_idx[i] = 71
buffer_index = 1 , len(sampled_ep) = 299, trace = from 8 to 16
ep_idx[i] = 131
buffer_index = 61 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 191
buffer_index = 121 , len(sampled_ep) = 131, trace = from 13 to 21
ep_idx[i] = 76
buffer_index = 6 , len(sampled_ep) = 198, trace = fro

ep_idx[i] = 106
buffer_index = 36 , len(sampled_ep) = 103, trace = from 85 to 93
ep_idx[i] = 71
buffer_index = 1 , len(sampled_ep) = 299, trace = from 122 to 130
ep_idx[i] = 285
buffer_index = 215 , len(sampled_ep) = 166, trace = from 67 to 75
ep_idx[i] = 72
buffer_index = 2 , len(sampled_ep) = 82, trace = from 31 to 39
ep_idx[i] = 251
buffer_index = 181 , len(sampled_ep) = 133, trace = from 83 to 91
ep_idx[i] = 235
buffer_index = 165 , len(sampled_ep) = 46, trace = from 5 to 13
ep_idx[i] = 314
buffer_index = 244 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 314
buffer_index = 244 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 252
buffer_index = 182 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 215
buffer_index = 145 , len(sampled_ep) = 117, trace = from 54 to 62
ep_idx[i] = 221
buffer_index = 151 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 163
buffer_index = 93 , len(sampled_ep) = 134, trace = from 79 to 87
sampledTraces.shape = (32, 8, 

buffer_index = 12 , len(sampled_ep) = 73, trace = from 59 to 67
ep_idx[i] = 331
buffer_index = 261 , len(sampled_ep) = 84, trace = from 19 to 27
ep_idx[i] = 301
buffer_index = 231 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 80
buffer_index = 10 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 107
buffer_index = 37 , len(sampled_ep) = 109, trace = from 1 to 9
ep_idx[i] = 287
buffer_index = 217 , len(sampled_ep) = 194, trace = from 141 to 149
ep_idx[i] = 303
buffer_index = 233 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 142
buffer_index = 72 , len(sampled_ep) = 108, trace = from 19 to 27
ep_idx[i] = 213
buffer_index = 143 , len(sampled_ep) = 148, trace = from 44 to 52
ep_idx[i] = 170
buffer_index = 100 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 334
buffer_index = 264 , len(sampled_ep) = 113, trace = from 72 to 80
ep_idx[i] = 194
buffer_index = 124 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 115
buffer_index = 45 , len(sample

buffer_index = 159 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 344
buffer_index = 274 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 114
buffer_index = 44 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 297
buffer_index = 227 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 132
buffer_index = 62 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 292
buffer_index = 222 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 306
buffer_index = 236 , len(sampled_ep) = 117, trace = from 105 to 113
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.993338500000055
epsilon is = 0.993337000000055
epsilon is = 0.993335500000055
epsilon is = 0.993334000000055
epsilon is = 0.993332500000055
Target Set Success
ep_idx = [328, 299, 213, 306, 310, 74, 310, 281, 169, 155, 234, 154, 314, 89, 72, 208, 288, 124, 345, 92, 71, 310, 134, 247, 186, 298, 119, 144, 118, 78, 142, 301]
exp_idx = [73, 97, 84, 29, 116, 58, 132, 126, 18, 27, 94, 74, 66, 72, 14, 66, 

buffer_index = 78 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 334
buffer_index = 264 , len(sampled_ep) = 113, trace = from 96 to 104
ep_idx[i] = 135
buffer_index = 65 , len(sampled_ep) = 211, trace = from 147 to 155
ep_idx[i] = 222
buffer_index = 152 , len(sampled_ep) = 224, trace = from 196 to 204
ep_idx[i] = 152
buffer_index = 82 , len(sampled_ep) = 228, trace = from 106 to 114
ep_idx[i] = 358
buffer_index = 288 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 70
buffer_index = 0 , len(sampled_ep) = 198, trace = from 9 to 17
ep_idx[i] = 327
buffer_index = 257 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 277
buffer_index = 207 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 310
buffer_index = 240 , len(sampled_ep) = 164, trace = from 21 to 29
ep_idx[i] = 111
buffer_index = 41 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 264 , len(sampled_ep) = 113, trace = from 103 to 111
ep_idx[i] = 140
buffer_index = 70 , le

buffer_index = 279 , len(sampled_ep) = 228, trace = from 191 to 199
ep_idx[i] = 229
buffer_index = 159 , len(sampled_ep) = 81, trace = from 17 to 25
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9933010000000553
epsilon is = 0.9932995000000553
epsilon is = 0.9932980000000553
epsilon is = 0.9932965000000553
epsilon is = 0.9932950000000553
Target Set Success
ep_idx = [139, 144, 260, 213, 93, 330, 274, 261, 270, 126, 76, 219, 280, 366, 177, 102, 132, 145, 312, 301, 220, 327, 232, 144, 135, 117, 110, 81, 251, 309, 349, 145]
exp_idx = [88, 17, 98, 19, 75, 127, 65, 16, 24, 37, 158, 21, 44, 16, 16, 8, 71, 55, 19, 19, 101, 44, 19, 41, 161, 7, 61, 13, 92, 106, 84, 101]
idx_offset = 70, self.episode_index = 369, len(self.buffer) = 300
ep_idx[i] = 139
buffer_index = 69 , len(sampled_ep) = 133, trace = from 81 to 89
ep_idx[i] = 144
buffer_index = 74 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 260
buffer_index = 190 , len(sampled_ep) = 108, trace = from 91 to 99
ep_idx[i] = 213
buff

buffer_index = 89 , len(sampled_ep) = 79, trace = from 19 to 27
ep_idx[i] = 340
buffer_index = 270 , len(sampled_ep) = 142, trace = from 9 to 17
ep_idx[i] = 92
buffer_index = 22 , len(sampled_ep) = 281, trace = from 130 to 138
ep_idx[i] = 234
buffer_index = 164 , len(sampled_ep) = 167, trace = from 42 to 50
ep_idx[i] = 163
buffer_index = 93 , len(sampled_ep) = 134, trace = from 124 to 132
ep_idx[i] = 200
buffer_index = 130 , len(sampled_ep) = 82, trace = from 1 to 9
ep_idx[i] = 135
buffer_index = 65 , len(sampled_ep) = 211, trace = from 49 to 57
ep_idx[i] = 170
buffer_index = 100 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 240
buffer_index = 170 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 208
buffer_index = 138 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 330
buffer_index = 260 , len(sampled_ep) = 175, trace = from 117 to 125
ep_idx[i] = 80
buffer_index = 10 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 268
buffer_index = 198 , len(sam

ep_idx = [167, 349, 193, 335, 133, 329, 131, 272, 334, 275, 243, 70, 202, 152, 120, 113, 140, 74, 107, 160, 350, 330, 291, 175, 92, 161, 103, 120, 70, 365, 318, 85]
exp_idx = [30, 195, 33, 54, 12, 24, 38, 16, 105, 59, 65, 195, 53, 110, 33, 67, 79, 46, 49, 27, 162, 74, 16, 21, 238, 83, 12, 79, 38, 60, 66, 17]
idx_offset = 70, self.episode_index = 369, len(self.buffer) = 300
ep_idx[i] = 167
buffer_index = 97 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 349
buffer_index = 279 , len(sampled_ep) = 228, trace = from 188 to 196
ep_idx[i] = 193
buffer_index = 123 , len(sampled_ep) = 108, trace = from 26 to 34
ep_idx[i] = 335
buffer_index = 265 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 133
buffer_index = 63 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 329
buffer_index = 259 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 131
buffer_index = 61 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 272
buffer_index = 202 , len(sampled_ep) = 70

ep_idx[i] = 106
buffer_index = 36 , len(sampled_ep) = 103, trace = from 82 to 90
ep_idx[i] = 78
buffer_index = 8 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 227
buffer_index = 157 , len(sampled_ep) = 112, trace = from 98 to 106
ep_idx[i] = 199
buffer_index = 129 , len(sampled_ep) = 116, trace = from 102 to 110
ep_idx[i] = 126
buffer_index = 56 , len(sampled_ep) = 46, trace = from 19 to 27
ep_idx[i] = 329
buffer_index = 259 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 116
buffer_index = 46 , len(sampled_ep) = 162, trace = from 130 to 138
ep_idx[i] = 288
buffer_index = 218 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 335
buffer_index = 265 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 100
buffer_index = 30 , len(sampled_ep) = 102, trace = from 78 to 86
ep_idx[i] = 248
buffer_index = 178 , len(sampled_ep) = 77, trace = from 50 to 58
ep_idx[i] = 122
buffer_index = 52 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 145
buffer_ind

buffer_index = 264 , len(sampled_ep) = 113, trace = from 98 to 106
ep_idx[i] = 71
buffer_index = 1 , len(sampled_ep) = 299, trace = from 171 to 179
ep_idx[i] = 123
buffer_index = 53 , len(sampled_ep) = 198, trace = from 58 to 66
ep_idx[i] = 298
buffer_index = 228 , len(sampled_ep) = 115, trace = from 60 to 68
ep_idx[i] = 350
buffer_index = 280 , len(sampled_ep) = 198, trace = from 7 to 15
ep_idx[i] = 349
buffer_index = 279 , len(sampled_ep) = 228, trace = from 202 to 210
ep_idx[i] = 164
buffer_index = 94 , len(sampled_ep) = 118, trace = from 15 to 23
ep_idx[i] = 202
buffer_index = 132 , len(sampled_ep) = 143, trace = from 89 to 97
ep_idx[i] = 249
buffer_index = 179 , len(sampled_ep) = 75, trace = from 33 to 41
ep_idx[i] = 136
buffer_index = 66 , len(sampled_ep) = 108, trace = from 61 to 69
ep_idx[i] = 70
buffer_index = 0 , len(sampled_ep) = 198, trace = from 54 to 62
ep_idx[i] = 286
buffer_index = 216 , len(sampled_ep) = 147, trace = from 75 to 83
ep_idx[i] = 334
buffer_index = 264 , l

buffer_index = 74 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 186
buffer_index = 116 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 81
buffer_index = 11 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 296
buffer_index = 226 , len(sampled_ep) = 108, trace = from 86 to 94
ep_idx[i] = 157
buffer_index = 87 , len(sampled_ep) = 103, trace = from 70 to 78
ep_idx[i] = 154
buffer_index = 84 , len(sampled_ep) = 153, trace = from 41 to 49
ep_idx[i] = 99
buffer_index = 29 , len(sampled_ep) = 83, trace = from 14 to 22
ep_idx[i] = 70
buffer_index = 0 , len(sampled_ep) = 198, trace = from 1 to 9
ep_idx[i] = 264
buffer_index = 194 , len(sampled_ep) = 109, trace = from 45 to 53
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9932035000000561
epsilon is = 0.9932020000000561
epsilon is = 0.9932005000000561
epsilon is = 0.9931990000000561
epsilon is = 0.9931975000000561
Target Set Success
ep_idx = [257, 135, 118, 202, 315, 329, 238, 112, 175, 182, 191, 334, 229, 140, 199, 

buffer_index = 98 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 349
buffer_index = 279 , len(sampled_ep) = 228, trace = from 45 to 53
ep_idx[i] = 201
buffer_index = 131 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 115
buffer_index = 45 , len(sampled_ep) = 299, trace = from 174 to 182
ep_idx[i] = 135
buffer_index = 65 , len(sampled_ep) = 211, trace = from 42 to 50
ep_idx[i] = 137
buffer_index = 67 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 192
buffer_index = 122 , len(sampled_ep) = 78, trace = from 15 to 23
ep_idx[i] = 222
buffer_index = 152 , len(sampled_ep) = 224, trace = from 187 to 195
ep_idx[i] = 330
buffer_index = 260 , len(sampled_ep) = 175, trace = from 19 to 27
ep_idx[i] = 336
buffer_index = 266 , len(sampled_ep) = 102, trace = from 60 to 68
ep_idx[i] = 157
buffer_index = 87 , len(sampled_ep) = 103, trace = from 3 to 11
ep_idx[i] = 86
buffer_index = 16 , len(sampled_ep) = 77, trace = from 65 to 73
ep_idx[i] = 265
buffer_index = 195 , len(sa

buffer_index = 1 , len(sampled_ep) = 299, trace = from 73 to 81
ep_idx[i] = 225
buffer_index = 155 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 262
buffer_index = 192 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 78
buffer_index = 8 , len(sampled_ep) = 81, trace = from 61 to 69
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9931660000000564
epsilon is = 0.9931645000000564
epsilon is = 0.9931630000000564
epsilon is = 0.9931615000000564
epsilon is = 0.9931600000000564
Target Set Success
ep_idx = [285, 343, 70, 92, 88, 353, 220, 254, 334, 290, 98, 121, 294, 145, 110, 212, 271, 340, 313, 195, 225, 340, 147, 92, 329, 285, 153, 117, 270, 334, 161, 329]
exp_idx = [158, 21, 157, 93, 26, 38, 69, 37, 81, 151, 53, 17, 33, 85, 48, 51, 37, 72, 76, 52, 15, 88, 18, 230, 34, 80, 60, 8, 77, 79, 89, 44]
idx_offset = 70, self.episode_index = 369, len(self.buffer) = 300
ep_idx[i] = 285
buffer_index = 215 , len(sampled_ep) = 166, trace = from 151 to 159
ep_idx[i] = 343
buffer_inde

buffer_index = 156 , len(sampled_ep) = 112, trace = from 11 to 19
ep_idx[i] = 197
buffer_index = 126 , len(sampled_ep) = 81, trace = from 21 to 29
ep_idx[i] = 192
buffer_index = 121 , len(sampled_ep) = 78, trace = from 8 to 16
ep_idx[i] = 167
buffer_index = 96 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 215
buffer_index = 144 , len(sampled_ep) = 117, trace = from 25 to 33
ep_idx[i] = 215
buffer_index = 144 , len(sampled_ep) = 117, trace = from 91 to 99
ep_idx[i] = 139
buffer_index = 68 , len(sampled_ep) = 133, trace = from 9 to 17
ep_idx[i] = 113
buffer_index = 42 , len(sampled_ep) = 105, trace = from 69 to 77
ep_idx[i] = 76
buffer_index = 5 , len(sampled_ep) = 198, trace = from 1 to 9
ep_idx[i] = 242
buffer_index = 171 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 327
buffer_index = 256 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 274
buffer_index = 203 , len(sampled_ep) = 114, trace = from 64 to 72
ep_idx[i] = 299
buffer_index = 228 , len(sampled

epsilon is = 0.9931285000000567
epsilon is = 0.9931270000000567
epsilon is = 0.9931255000000567
epsilon is = 0.9931240000000567
epsilon is = 0.9931225000000568
Target Set Success
ep_idx = [156, 71, 324, 268, 298, 89, 248, 276, 334, 236, 260, 232, 71, 320, 277, 311, 332, 302, 151, 207, 305, 247, 347, 256, 206, 157, 334, 278, 237, 338, 267, 151]
exp_idx = [72, 81, 17, 78, 106, 17, 69, 27, 109, 58, 72, 23, 156, 54, 19, 20, 57, 30, 26, 46, 50, 70, 14, 46, 35, 78, 98, 13, 106, 70, 34, 74]
idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 156
buffer_index = 85 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 71
buffer_index = 0 , len(sampled_ep) = 299, trace = from 74 to 82
ep_idx[i] = 324
buffer_index = 253 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 268
buffer_index = 197 , len(sampled_ep) = 120, trace = from 71 to 79
ep_idx[i] = 298
buffer_index = 227 , len(sampled_ep) = 115, trace = from 99 to 107
ep_idx[i] = 89
buffer_index = 18 , len(s

buffer_index = 25 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 274
buffer_index = 203 , len(sampled_ep) = 114, trace = from 88 to 96
ep_idx[i] = 298
buffer_index = 227 , len(sampled_ep) = 115, trace = from 38 to 46
ep_idx[i] = 330
buffer_index = 259 , len(sampled_ep) = 175, trace = from 134 to 142
ep_idx[i] = 184
buffer_index = 113 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 221
buffer_index = 150 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 114
buffer_index = 43 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 306
buffer_index = 235 , len(sampled_ep) = 117, trace = from 20 to 28
ep_idx[i] = 218
buffer_index = 147 , len(sampled_ep) = 80, trace = from 35 to 43
ep_idx[i] = 302
buffer_index = 231 , len(sampled_ep) = 79, trace = from 44 to 52
ep_idx[i] = 292
buffer_index = 221 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 254
buffer_index = 183 , len(sampled_ep) = 116, trace = from 34 to 42
ep_idx[i] = 334
buffer_index = 263 , len

exp_idx = [169, 14, 30, 163, 73, 98, 11, 45, 35, 35, 38, 167, 58, 19, 58, 128, 29, 100, 17, 71, 82, 11, 26, 44, 34, 15, 113, 50, 22, 84, 57, 20]
idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 281
buffer_index = 210 , len(sampled_ep) = 178, trace = from 162 to 170
ep_idx[i] = 132
buffer_index = 61 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 331
buffer_index = 260 , len(sampled_ep) = 84, trace = from 23 to 31
ep_idx[i] = 71
buffer_index = 0 , len(sampled_ep) = 299, trace = from 156 to 164
ep_idx[i] = 313
buffer_index = 242 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 85
buffer_index = 14 , len(sampled_ep) = 108, trace = from 91 to 99
ep_idx[i] = 190
buffer_index = 119 , len(sampled_ep) = 42, trace = from 4 to 12
ep_idx[i] = 317
buffer_index = 246 , len(sampled_ep) = 84, trace = from 38 to 46
ep_idx[i] = 156
buffer_index = 85 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 307
buffer_index = 236 , len(sampled_ep) = 48, tra

buffer_index = 217 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 200
buffer_index = 129 , len(sampled_ep) = 82, trace = from 60 to 68
ep_idx[i] = 123
buffer_index = 52 , len(sampled_ep) = 198, trace = from 47 to 55
ep_idx[i] = 337
buffer_index = 266 , len(sampled_ep) = 82, trace = from 63 to 71
ep_idx[i] = 312
buffer_index = 241 , len(sampled_ep) = 108, trace = from 74 to 82
ep_idx[i] = 135
buffer_index = 64 , len(sampled_ep) = 211, trace = from 107 to 115
ep_idx[i] = 123
buffer_index = 52 , len(sampled_ep) = 198, trace = from 126 to 134
ep_idx[i] = 179
buffer_index = 108 , len(sampled_ep) = 77, trace = from 21 to 29
ep_idx[i] = 148
buffer_index = 77 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 175
buffer_index = 104 , len(sampled_ep) = 102, trace = from 82 to 90
ep_idx[i] = 237
buffer_index = 166 , len(sampled_ep) = 116, trace = from 33 to 41
ep_idx[i] = 220
buffer_index = 149 , len(sampled_ep) = 118, trace = from 13 to 21
ep_idx[i] = 178
buffer_index = 107 , 

buffer_index = 151 , len(sampled_ep) = 224, trace = from 112 to 120
ep_idx[i] = 194
buffer_index = 123 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 299
buffer_index = 228 , len(sampled_ep) = 100, trace = from 6 to 14
ep_idx[i] = 156
buffer_index = 85 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 306
buffer_index = 235 , len(sampled_ep) = 117, trace = from 76 to 84
ep_idx[i] = 218
buffer_index = 147 , len(sampled_ep) = 80, trace = from 20 to 28
ep_idx[i] = 264
buffer_index = 193 , len(sampled_ep) = 109, trace = from 24 to 32
ep_idx[i] = 317
buffer_index = 246 , len(sampled_ep) = 84, trace = from 67 to 75
ep_idx[i] = 263
buffer_index = 192 , len(sampled_ep) = 51, trace = from 29 to 37
ep_idx[i] = 200
buffer_index = 129 , len(sampled_ep) = 82, trace = from 48 to 56
ep_idx[i] = 204
buffer_index = 133 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 329
buffer_index = 258 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 87
buffer_index = 16 , len(s

buffer_index = 42 , len(sampled_ep) = 105, trace = from 81 to 89
ep_idx[i] = 153
buffer_index = 82 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 157
buffer_index = 86 , len(sampled_ep) = 103, trace = from 61 to 69
ep_idx[i] = 328
buffer_index = 257 , len(sampled_ep) = 75, trace = from 0 to 8
ep_idx[i] = 315
buffer_index = 244 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 223
buffer_index = 152 , len(sampled_ep) = 112, trace = from 47 to 55
ep_idx[i] = 139
buffer_index = 68 , len(sampled_ep) = 133, trace = from 40 to 48
ep_idx[i] = 154
buffer_index = 83 , len(sampled_ep) = 153, trace = from 118 to 126
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9930310000000575
epsilon is = 0.9930295000000575
epsilon is = 0.9930280000000575
epsilon is = 0.9930265000000575
epsilon is = 0.9930250000000576
Target Set Success
ep_idx = [329, 216, 334, 140, 92, 322, 143, 332, 109, 173, 116, 275, 293, 353, 330, 255, 88, 309, 234, 325, 325, 108, 111, 348, 77, 334, 287, 290, 304, 337,

buffer_index = 86 , len(sampled_ep) = 103, trace = from 95 to 103
ep_idx[i] = 220
buffer_index = 149 , len(sampled_ep) = 118, trace = from 29 to 37
ep_idx[i] = 202
buffer_index = 131 , len(sampled_ep) = 143, trace = from 36 to 44
ep_idx[i] = 160
buffer_index = 89 , len(sampled_ep) = 117, trace = from 79 to 87
ep_idx[i] = 356
buffer_index = 285 , len(sampled_ep) = 100, trace = from 86 to 94
ep_idx[i] = 240
buffer_index = 169 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 112
buffer_index = 41 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 335
buffer_index = 264 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 136
buffer_index = 65 , len(sampled_ep) = 108, trace = from 71 to 79
ep_idx[i] = 350
buffer_index = 279 , len(sampled_ep) = 198, trace = from 91 to 99
ep_idx[i] = 255
buffer_index = 184 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 213
buffer_index = 142 , len(sampled_ep) = 148, trace = from 88 to 96
ep_idx[i] = 87
buffer_index = 16 , len(sa

buffer_index = 279 , len(sampled_ep) = 198, trace = from 87 to 95
ep_idx[i] = 285
buffer_index = 214 , len(sampled_ep) = 166, trace = from 49 to 57
ep_idx[i] = 289
buffer_index = 218 , len(sampled_ep) = 83, trace = from 14 to 22
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9929935000000578
epsilon is = 0.9929920000000578
epsilon is = 0.9929905000000578
epsilon is = 0.9929890000000579
epsilon is = 0.9929875000000579
Target Set Success
ep_idx = [195, 273, 214, 362, 155, 150, 87, 115, 249, 212, 115, 74, 171, 137, 286, 334, 131, 312, 172, 213, 260, 271, 180, 166, 289, 209, 370, 213, 330, 195, 245, 259]
exp_idx = [21, 75, 74, 63, 76, 9, 66, 72, 56, 60, 137, 17, 21, 44, 83, 80, 16, 60, 57, 84, 32, 29, 134, 18, 36, 36, 156, 91, 127, 43, 44, 17]
idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 195
buffer_index = 124 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 273
buffer_index = 202 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 214
buffe

buffer_index = 270 , len(sampled_ep) = 51, trace = from 5 to 13
ep_idx[i] = 113
buffer_index = 42 , len(sampled_ep) = 105, trace = from 46 to 54
ep_idx[i] = 102
buffer_index = 31 , len(sampled_ep) = 103, trace = from 8 to 16
ep_idx[i] = 259
buffer_index = 188 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 120
buffer_index = 49 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 332
buffer_index = 261 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 148
buffer_index = 77 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 233
buffer_index = 162 , len(sampled_ep) = 45, trace = from 22 to 30
ep_idx[i] = 333
buffer_index = 262 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 123
buffer_index = 52 , len(sampled_ep) = 198, trace = from 134 to 142
ep_idx[i] = 333
buffer_index = 262 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 152
buffer_index = 81 , len(sampled_ep) = 228, trace = from 136 to 144
ep_idx[i] = 299
buffer_index = 228 , len(samp

ep_idx = [227, 165, 224, 107, 325, 329, 242, 307, 100, 334, 191, 160, 215, 212, 363, 76, 142, 131, 252, 306, 317, 307, 369, 285, 252, 306, 349, 180, 203, 332, 290, 200]
exp_idx = [52, 45, 13, 93, 8, 72, 14, 46, 30, 107, 25, 20, 53, 16, 70, 153, 41, 60, 7, 102, 19, 21, 45, 140, 17, 102, 16, 62, 18, 62, 53, 14]
idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 227
buffer_index = 156 , len(sampled_ep) = 112, trace = from 45 to 53
ep_idx[i] = 165
buffer_index = 94 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 224
buffer_index = 153 , len(sampled_ep) = 46, trace = from 6 to 14
ep_idx[i] = 107
buffer_index = 36 , len(sampled_ep) = 109, trace = from 86 to 94
ep_idx[i] = 325
buffer_index = 254 , len(sampled_ep) = 82, trace = from 1 to 9
ep_idx[i] = 329
buffer_index = 258 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 242
buffer_index = 171 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 307
buffer_index = 236 , len(sampled_ep) = 48, t

buffer_index = 144 , len(sampled_ep) = 117, trace = from 74 to 82
ep_idx[i] = 154
buffer_index = 83 , len(sampled_ep) = 153, trace = from 17 to 25
ep_idx[i] = 283
buffer_index = 212 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 311
buffer_index = 240 , len(sampled_ep) = 84, trace = from 22 to 30
ep_idx[i] = 241
buffer_index = 170 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 331
buffer_index = 260 , len(sampled_ep) = 84, trace = from 35 to 43
ep_idx[i] = 230
buffer_index = 159 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 209
buffer_index = 138 , len(sampled_ep) = 71, trace = from 25 to 33
ep_idx[i] = 218
buffer_index = 147 , len(sampled_ep) = 80, trace = from 8 to 16
ep_idx[i] = 298
buffer_index = 227 , len(sampled_ep) = 115, trace = from 100 to 108
ep_idx[i] = 98
buffer_index = 27 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 353
buffer_index = 282 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 113
buffer_index = 42 , len(samp

buffer_index = 177 , len(sampled_ep) = 77, trace = from 25 to 33
ep_idx[i] = 84
buffer_index = 13 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 277
buffer_index = 206 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 219
buffer_index = 148 , len(sampled_ep) = 110, trace = from 89 to 97
ep_idx[i] = 341
buffer_index = 270 , len(sampled_ep) = 51, trace = from 9 to 17
ep_idx[i] = 183
buffer_index = 112 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 187
buffer_index = 116 , len(sampled_ep) = 84, trace = from 14 to 22
ep_idx[i] = 223
buffer_index = 152 , len(sampled_ep) = 112, trace = from 59 to 67
ep_idx[i] = 110
buffer_index = 39 , len(sampled_ep) = 137, trace = from 30 to 38
ep_idx[i] = 248
buffer_index = 177 , len(sampled_ep) = 77, trace = from 60 to 68
ep_idx[i] = 203
buffer_index = 132 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 92
buffer_index = 21 , len(sampled_ep) = 281, trace = from 33 to 41
ep_idx[i] = 158
buffer_index = 87 , len(sample

buffer_index = 263 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 306
buffer_index = 235 , len(sampled_ep) = 117, trace = from 15 to 23
ep_idx[i] = 359
buffer_index = 288 , len(sampled_ep) = 45, trace = from 10 to 18
ep_idx[i] = 224
buffer_index = 153 , len(sampled_ep) = 46, trace = from 27 to 35
ep_idx[i] = 249
buffer_index = 178 , len(sampled_ep) = 75, trace = from 41 to 49
ep_idx[i] = 335
buffer_index = 264 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 299
buffer_index = 228 , len(sampled_ep) = 100, trace = from 74 to 82
ep_idx[i] = 268
buffer_index = 197 , len(sampled_ep) = 120, trace = from 82 to 90
ep_idx[i] = 119
buffer_index = 48 , len(sampled_ep) = 183, trace = from 175 to 183
ep_idx[i] = 264
buffer_index = 193 , len(sampled_ep) = 109, trace = from 77 to 85
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9928960000000586
epsilon is = 0.9928945000000586
epsilon is = 0.9928930000000586
epsilon is = 0.9928915000000587
epsilon is = 0.9928900000000587
Targe

buffer_index = 43 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 220
buffer_index = 149 , len(sampled_ep) = 118, trace = from 14 to 22
ep_idx[i] = 185
buffer_index = 114 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 256
buffer_index = 185 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 226
buffer_index = 155 , len(sampled_ep) = 120, trace = from 14 to 22
ep_idx[i] = 123
buffer_index = 52 , len(sampled_ep) = 198, trace = from 41 to 49
ep_idx[i] = 167
buffer_index = 96 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 290
buffer_index = 219 , len(sampled_ep) = 152, trace = from 66 to 74
ep_idx[i] = 274
buffer_index = 203 , len(sampled_ep) = 114, trace = from 3 to 11
ep_idx[i] = 291
buffer_index = 220 , len(sampled_ep) = 103, trace = from 90 to 98
ep_idx[i] = 274
buffer_index = 203 , len(sampled_ep) = 114, trace = from 55 to 63
ep_idx[i] = 129
buffer_index = 58 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 283
buffer_index = 212 , len(s

ep_idx[i] = 297
buffer_index = 226 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 104
buffer_index = 33 , len(sampled_ep) = 50, trace = from 11 to 19
ep_idx[i] = 328
buffer_index = 257 , len(sampled_ep) = 75, trace = from 32 to 40
ep_idx[i] = 180
buffer_index = 109 , len(sampled_ep) = 141, trace = from 26 to 34
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9928585000000589
epsilon is = 0.9928570000000589
epsilon is = 0.992855500000059
epsilon is = 0.992854000000059
epsilon is = 0.992852500000059
Target Set Success
ep_idx = [334, 180, 211, 180, 309, 152, 179, 336, 351, 349, 340, 303, 149, 161, 160, 304, 284, 334, 327, 71, 317, 211, 113, 185, 245, 136, 224, 330, 221, 294, 234, 90]
exp_idx = [52, 45, 18, 39, 34, 156, 29, 46, 86, 89, 19, 17, 53, 39, 98, 14, 21, 107, 12, 260, 26, 68, 19, 65, 11, 92, 14, 21, 39, 16, 59, 43]
idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 334
buffer_index = 263 , len(sampled_ep) = 113, trace = from 45 to 53
ep_idx[i] 

buffer_index = 257 , len(sampled_ep) = 75, trace = from 60 to 68
ep_idx[i] = 135
buffer_index = 64 , len(sampled_ep) = 211, trace = from 51 to 59
ep_idx[i] = 351
buffer_index = 280 , len(sampled_ep) = 143, trace = from 86 to 94
ep_idx[i] = 227
buffer_index = 156 , len(sampled_ep) = 112, trace = from 68 to 76
ep_idx[i] = 254
buffer_index = 183 , len(sampled_ep) = 116, trace = from 72 to 80
ep_idx[i] = 281
buffer_index = 210 , len(sampled_ep) = 178, trace = from 168 to 176
ep_idx[i] = 287
buffer_index = 216 , len(sampled_ep) = 194, trace = from 99 to 107
ep_idx[i] = 172
buffer_index = 101 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 222
buffer_index = 151 , len(sampled_ep) = 224, trace = from 42 to 50
ep_idx[i] = 134
buffer_index = 63 , len(sampled_ep) = 208, trace = from 65 to 73
ep_idx[i] = 199
buffer_index = 128 , len(sampled_ep) = 116, trace = from 46 to 54
ep_idx[i] = 219
buffer_index = 148 , len(sampled_ep) = 110, trace = from 41 to 49
ep_idx[i] = 334
buffer_index = 26

epsilon is = 0.9928165000000593
epsilon is = 0.9928150000000593
Target Set Success
ep_idx = [323, 92, 167, 287, 281, 253, 330, 298, 334, 130, 166, 222, 174, 195, 138, 227, 301, 173, 118, 106, 327, 74, 212, 115, 286, 294, 202, 92, 153, 347, 332, 106]
exp_idx = [36, 236, 74, 147, 134, 26, 18, 18, 111, 63, 57, 18, 16, 42, 16, 24, 59, 44, 69, 97, 22, 16, 16, 119, 135, 19, 130, 144, 30, 30, 7, 46]
idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 323
buffer_index = 252 , len(sampled_ep) = 43, trace = from 29 to 37
ep_idx[i] = 92
buffer_index = 21 , len(sampled_ep) = 281, trace = from 229 to 237
ep_idx[i] = 167
buffer_index = 96 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 287
buffer_index = 216 , len(sampled_ep) = 194, trace = from 140 to 148
ep_idx[i] = 281
buffer_index = 210 , len(sampled_ep) = 178, trace = from 127 to 135
ep_idx[i] = 253
buffer_index = 182 , len(sampled_ep) = 76, trace = from 19 to 27
ep_idx[i] = 330
buffer_index = 259 , len(sampl

buffer_index = 44 , len(sampled_ep) = 299, trace = from 291 to 299
ep_idx[i] = 241
buffer_index = 170 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 171
buffer_index = 100 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 296
buffer_index = 225 , len(sampled_ep) = 108, trace = from 73 to 81
ep_idx[i] = 322
buffer_index = 251 , len(sampled_ep) = 111, trace = from 95 to 103
ep_idx[i] = 153
buffer_index = 82 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 202
buffer_index = 131 , len(sampled_ep) = 143, trace = from 25 to 33
ep_idx[i] = 139
buffer_index = 68 , len(sampled_ep) = 133, trace = from 1 to 9
ep_idx[i] = 206
buffer_index = 135 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 342
buffer_index = 271 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 349
buffer_index = 278 , len(sampled_ep) = 228, trace = from 207 to 215
ep_idx[i] = 231
buffer_index = 160 , len(sampled_ep) = 77, trace = from 7 to 15
ep_idx[i] = 225
buffer_index = 154 , len

idx_offset = 71, self.episode_index = 370, len(self.buffer) = 300
ep_idx[i] = 212
buffer_index = 141 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 164
buffer_index = 93 , len(sampled_ep) = 118, trace = from 2 to 10
ep_idx[i] = 317
buffer_index = 246 , len(sampled_ep) = 84, trace = from 11 to 19
ep_idx[i] = 226
buffer_index = 155 , len(sampled_ep) = 120, trace = from 43 to 51
ep_idx[i] = 262
buffer_index = 191 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 332
buffer_index = 261 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 272
buffer_index = 201 , len(sampled_ep) = 70, trace = from 20 to 28
ep_idx[i] = 169
buffer_index = 98 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 336
buffer_index = 265 , len(sampled_ep) = 102, trace = from 70 to 78
ep_idx[i] = 136
buffer_index = 65 , len(sampled_ep) = 108, trace = from 3 to 11
ep_idx[i] = 270
buffer_index = 199 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 334
buffer_index = 263 , len(sam

buffer_index = 49 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 225
buffer_index = 154 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 337
buffer_index = 266 , len(sampled_ep) = 82, trace = from 14 to 22
ep_idx[i] = 364
buffer_index = 293 , len(sampled_ep) = 48, trace = from 18 to 26
ep_idx[i] = 238
buffer_index = 167 , len(sampled_ep) = 79, trace = from 7 to 15
ep_idx[i] = 187
buffer_index = 116 , len(sampled_ep) = 84, trace = from 12 to 20
ep_idx[i] = 364
buffer_index = 293 , len(sampled_ep) = 48, trace = from 40 to 48
ep_idx[i] = 322
buffer_index = 251 , len(sampled_ep) = 111, trace = from 66 to 74
ep_idx[i] = 329
buffer_index = 258 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 171
buffer_index = 100 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 110
buffer_index = 39 , len(sampled_ep) = 137, trace = from 56 to 64
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9927610000000597
epsilon is = 0.9927595000000597
epsilon is = 0.9927580000000598

buffer_index = 181 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 350
buffer_index = 279 , len(sampled_ep) = 198, trace = from 28 to 36
ep_idx[i] = 290
buffer_index = 219 , len(sampled_ep) = 152, trace = from 75 to 83
ep_idx[i] = 264
buffer_index = 193 , len(sampled_ep) = 109, trace = from 64 to 72
ep_idx[i] = 278
buffer_index = 207 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 368
buffer_index = 297 , len(sampled_ep) = 81, trace = from 27 to 35
ep_idx[i] = 303
buffer_index = 232 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 322
buffer_index = 251 , len(sampled_ep) = 111, trace = from 24 to 32
ep_idx[i] = 334
buffer_index = 263 , len(sampled_ep) = 113, trace = from 84 to 92
ep_idx[i] = 340
buffer_index = 269 , len(sampled_ep) = 142, trace = from 1 to 9
ep_idx[i] = 222
buffer_index = 151 , len(sampled_ep) = 224, trace = from 156 to 164
ep_idx[i] = 330
buffer_index = 259 , len(sampled_ep) = 175, trace = from 100 to 108
ep_idx[i] = 141
buffer_index = 70 ,

buffer_index = 202 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 334
buffer_index = 263 , len(sampled_ep) = 113, trace = from 93 to 101
ep_idx[i] = 221
buffer_index = 150 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 221
buffer_index = 150 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 230
buffer_index = 159 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 215
buffer_index = 144 , len(sampled_ep) = 117, trace = from 58 to 66
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.99272350000006
epsilon is = 0.9927220000000601
epsilon is = 0.9927205000000601
epsilon is = 0.9927190000000601
epsilon is = 0.9927175000000601
Target Set Success
ep_idx = [328, 115, 165, 325, 334, 167, 114, 309, 288, 170, 340, 191, 170, 295, 295, 330, 291, 313, 350, 115, 285, 177, 147, 310, 264, 260, 273, 280, 239, 309, 330, 99]
exp_idx = [34, 106, 17, 41, 104, 66, 27, 102, 27, 75, 40, 17, 34, 25, 16, 72, 18, 45, 73, 84, 68, 17, 18, 151, 20, 76, 44, 55, 55, 91, 27, 15]
idx_o

buffer_index = 262 , len(sampled_ep) = 113, trace = from 34 to 42
ep_idx[i] = 362
buffer_index = 290 , len(sampled_ep) = 104, trace = from 96 to 104
ep_idx[i] = 308
buffer_index = 236 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 223
buffer_index = 151 , len(sampled_ep) = 112, trace = from 89 to 97
ep_idx[i] = 341
buffer_index = 269 , len(sampled_ep) = 51, trace = from 29 to 37
ep_idx[i] = 130
buffer_index = 58 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 234
buffer_index = 162 , len(sampled_ep) = 167, trace = from 82 to 90
ep_idx[i] = 194
buffer_index = 122 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 270
buffer_index = 198 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 360
buffer_index = 288 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 309
buffer_index = 237 , len(sampled_ep) = 115, trace = from 83 to 91
ep_idx[i] = 88
buffer_index = 16 , len(sampled_ep) = 114, trace = from 76 to 84
ep_idx[i] = 264
buffer_index = 192 , len(sam

buffer_index = 157 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 227
buffer_index = 155 , len(sampled_ep) = 112, trace = from 92 to 100
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9926860000000604
epsilon is = 0.9926845000000604
epsilon is = 0.9926830000000604
epsilon is = 0.9926815000000604
epsilon is = 0.9926800000000604
Target Set Success
ep_idx = [119, 215, 102, 152, 320, 251, 205, 177, 272, 274, 281, 302, 187, 95, 319, 371, 337, 286, 186, 349, 248, 256, 91, 223, 327, 176, 107, 159, 334, 333, 334, 163]
exp_idx = [40, 52, 94, 95, 20, 105, 13, 16, 26, 7, 174, 78, 68, 26, 36, 20, 25, 90, 57, 67, 29, 11, 28, 99, 75, 66, 50, 15, 109, 18, 106, 20]
idx_offset = 72, self.episode_index = 371, len(self.buffer) = 300
ep_idx[i] = 119
buffer_index = 47 , len(sampled_ep) = 183, trace = from 33 to 41
ep_idx[i] = 215
buffer_index = 143 , len(sampled_ep) = 117, trace = from 45 to 53
ep_idx[i] = 102
buffer_index = 30 , len(sampled_ep) = 103, trace = from 87 to 95
ep_idx[i] = 152
buffe

buffer_index = 6 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 116
buffer_index = 44 , len(sampled_ep) = 162, trace = from 76 to 84
ep_idx[i] = 310
buffer_index = 238 , len(sampled_ep) = 164, trace = from 136 to 144
ep_idx[i] = 257
buffer_index = 185 , len(sampled_ep) = 82, trace = from 36 to 44
ep_idx[i] = 316
buffer_index = 244 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 195
buffer_index = 123 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 145
buffer_index = 73 , len(sampled_ep) = 117, trace = from 93 to 101
ep_idx[i] = 256
buffer_index = 184 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 212
buffer_index = 140 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 80
buffer_index = 8 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 323
buffer_index = 251 , len(sampled_ep) = 43, trace = from 3 to 11
ep_idx[i] = 117
buffer_index = 45 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 345
buffer_index = 273 , len(sampled_

ep_idx = [110, 334, 254, 113, 283, 195, 331, 298, 350, 188, 85, 257, 87, 151, 94, 287, 294, 150, 327, 92, 144, 371, 334, 100, 145, 299, 119, 205, 269, 332, 94, 304]
exp_idx = [22, 101, 79, 16, 20, 29, 55, 96, 184, 18, 21, 16, 45, 50, 31, 16, 66, 18, 74, 228, 19, 68, 39, 69, 81, 65, 131, 43, 33, 13, 14, 16]
idx_offset = 72, self.episode_index = 371, len(self.buffer) = 300
ep_idx[i] = 110
buffer_index = 38 , len(sampled_ep) = 137, trace = from 15 to 23
ep_idx[i] = 334
buffer_index = 262 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 254
buffer_index = 182 , len(sampled_ep) = 116, trace = from 72 to 80
ep_idx[i] = 113
buffer_index = 41 , len(sampled_ep) = 105, trace = from 9 to 17
ep_idx[i] = 283
buffer_index = 211 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 195
buffer_index = 123 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 331
buffer_index = 259 , len(sampled_ep) = 84, trace = from 48 to 56
ep_idx[i] = 298
buffer_index = 226 , len(sampled_ep) = 11

buffer_index = 127 , len(sampled_ep) = 116, trace = from 90 to 98
ep_idx[i] = 286
buffer_index = 214 , len(sampled_ep) = 147, trace = from 75 to 83
ep_idx[i] = 201
buffer_index = 129 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 197
buffer_index = 125 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 334
buffer_index = 262 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 89
buffer_index = 17 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 281
buffer_index = 209 , len(sampled_ep) = 178, trace = from 40 to 48
ep_idx[i] = 189
buffer_index = 117 , len(sampled_ep) = 84, trace = from 35 to 43
ep_idx[i] = 226
buffer_index = 154 , len(sampled_ep) = 120, trace = from 68 to 76
ep_idx[i] = 134
buffer_index = 62 , len(sampled_ep) = 208, trace = from 134 to 142
ep_idx[i] = 339
buffer_index = 267 , len(sampled_ep) = 78, trace = from 60 to 68
ep_idx[i] = 237
buffer_index = 165 , len(sampled_ep) = 116, trace = from 2 to 10
ep_idx[i] = 232
buffer_index = 160 , l

buffer_index = 98 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 92
buffer_index = 20 , len(sampled_ep) = 281, trace = from 93 to 101
ep_idx[i] = 330
buffer_index = 258 , len(sampled_ep) = 175, trace = from 114 to 122
ep_idx[i] = 81
buffer_index = 9 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 133
buffer_index = 61 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 260
buffer_index = 188 , len(sampled_ep) = 108, trace = from 69 to 77
ep_idx[i] = 145
buffer_index = 73 , len(sampled_ep) = 117, trace = from 93 to 101
ep_idx[i] = 84
buffer_index = 12 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 158
buffer_index = 86 , len(sampled_ep) = 83, trace = from 61 to 69
ep_idx[i] = 360
buffer_index = 288 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 334
buffer_index = 262 , len(sampled_ep) = 113, trace = from 18 to 26
ep_idx[i] = 333
buffer_index = 261 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 214
buffer_index = 142 , len(sampled

ep_idx[i] = 176
buffer_index = 104 , len(sampled_ep) = 80, trace = from 36 to 44
ep_idx[i] = 170
buffer_index = 98 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 366
buffer_index = 294 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 347
buffer_index = 275 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 73
buffer_index = 1 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 76
buffer_index = 4 , len(sampled_ep) = 198, trace = from 60 to 68
ep_idx[i] = 140
buffer_index = 68 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 258
buffer_index = 186 , len(sampled_ep) = 76, trace = from 28 to 36
ep_idx[i] = 334
buffer_index = 262 , len(sampled_ep) = 113, trace = from 53 to 61
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9925885000000612
length of poped element = 82 , cntr = 75 , diff = 7
epsilon is = 0.9925870000000612
epsilon is = 0.9925855000000612
epsilon is = 0.9925840000000612
epsilon is = 0.9925825000000612
Target Set Success
ep_idx = [309, 

buffer_index = 248 , len(sampled_ep) = 81, trace = from 55 to 63
ep_idx[i] = 191
buffer_index = 118 , len(sampled_ep) = 131, trace = from 86 to 94
ep_idx[i] = 164
buffer_index = 91 , len(sampled_ep) = 118, trace = from 10 to 18
ep_idx[i] = 244
buffer_index = 171 , len(sampled_ep) = 17, trace = from 4 to 12
ep_idx[i] = 129
buffer_index = 56 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 228
buffer_index = 155 , len(sampled_ep) = 81, trace = from 26 to 34
ep_idx[i] = 224
buffer_index = 151 , len(sampled_ep) = 46, trace = from 7 to 15
ep_idx[i] = 239
buffer_index = 166 , len(sampled_ep) = 101, trace = from 46 to 54
ep_idx[i] = 153
buffer_index = 80 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 99
buffer_index = 26 , len(sampled_ep) = 83, trace = from 56 to 64
ep_idx[i] = 150
buffer_index = 77 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 100
buffer_index = 27 , len(sampled_ep) = 102, trace = from 22 to 30
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep

buffer_index = 4 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 351
buffer_index = 278 , len(sampled_ep) = 143, trace = from 121 to 129
ep_idx[i] = 102
buffer_index = 29 , len(sampled_ep) = 103, trace = from 82 to 90
ep_idx[i] = 312
buffer_index = 239 , len(sampled_ep) = 108, trace = from 46 to 54
ep_idx[i] = 330
buffer_index = 257 , len(sampled_ep) = 175, trace = from 132 to 140
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9925510000000615
epsilon is = 0.9925495000000615
epsilon is = 0.9925480000000615
epsilon is = 0.9925465000000615
epsilon is = 0.9925450000000615
Target Set Success
ep_idx = [133, 87, 166, 117, 131, 296, 192, 296, 87, 264, 245, 347, 92, 92, 334, 349, 193, 179, 359, 322, 204, 145, 138, 202, 94, 251, 148, 356, 231, 259, 184, 129]
exp_idx = [72, 19, 69, 62, 43, 22, 14, 86, 17, 15, 20, 23, 266, 186, 85, 131, 46, 16, 20, 108, 12, 107, 52, 69, 14, 108, 62, 93, 21, 64, 61, 16]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 133
bu

buffer_index = 66 , len(sampled_ep) = 133, trace = from 11 to 19
ep_idx[i] = 142
buffer_index = 69 , len(sampled_ep) = 108, trace = from 35 to 43
ep_idx[i] = 133
buffer_index = 60 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 270
buffer_index = 197 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 356
buffer_index = 283 , len(sampled_ep) = 100, trace = from 84 to 92
ep_idx[i] = 131
buffer_index = 58 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 180
buffer_index = 107 , len(sampled_ep) = 141, trace = from 108 to 116
ep_idx[i] = 370
buffer_index = 297 , len(sampled_ep) = 189, trace = from 142 to 150
ep_idx[i] = 290
buffer_index = 217 , len(sampled_ep) = 152, trace = from 105 to 113
ep_idx[i] = 149
buffer_index = 76 , len(sampled_ep) = 82, trace = from 67 to 75
ep_idx[i] = 286
buffer_index = 213 , len(sampled_ep) = 147, trace = from 34 to 42
ep_idx[i] = 330
buffer_index = 257 , len(sampled_ep) = 175, trace = from 131 to 139
ep_idx[i] = 281
buffer_index = 208

epsilon is = 0.9925135000000618
epsilon is = 0.9925120000000618
epsilon is = 0.9925105000000618
epsilon is = 0.9925090000000618
epsilon is = 0.9925075000000618
Target Set Success
ep_idx = [162, 216, 235, 77, 188, 285, 275, 220, 320, 208, 158, 153, 334, 206, 334, 150, 301, 280, 148, 86, 157, 251, 317, 239, 92, 115, 139, 151, 263, 286, 202, 351]
exp_idx = [102, 75, 21, 15, 15, 131, 16, 57, 20, 51, 80, 9, 102, 34, 69, 64, 20, 78, 24, 12, 76, 43, 53, 59, 63, 45, 17, 46, 14, 38, 81, 58]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 162
buffer_index = 89 , len(sampled_ep) = 112, trace = from 95 to 103
ep_idx[i] = 216
buffer_index = 143 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 235
buffer_index = 162 , len(sampled_ep) = 46, trace = from 14 to 22
ep_idx[i] = 77
buffer_index = 4 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 188
buffer_index = 115 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 285
buffer_index = 212 , len(sampl

buffer_index = 263 , len(sampled_ep) = 102, trace = from 9 to 17
ep_idx[i] = 278
buffer_index = 205 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 132
buffer_index = 59 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 222
buffer_index = 149 , len(sampled_ep) = 224, trace = from 96 to 104
ep_idx[i] = 338
buffer_index = 265 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 259
buffer_index = 186 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 349
buffer_index = 276 , len(sampled_ep) = 228, trace = from 76 to 84
ep_idx[i] = 328
buffer_index = 255 , len(sampled_ep) = 75, trace = from 13 to 21
ep_idx[i] = 359
buffer_index = 286 , len(sampled_ep) = 45, trace = from 7 to 15
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 98 to 106
ep_idx[i] = 178
buffer_index = 105 , len(sampled_ep) = 80, trace = from 23 to 31
ep_idx[i] = 283
buffer_index = 210 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 180
buffer_index = 107 , len(sa

exp_idx = [14, 21, 30, 21, 99, 41, 18, 10, 59, 9, 89, 30, 49, 18, 22, 54, 66, 22, 64, 46, 200, 13, 24, 135, 13, 33, 15, 54, 99, 45, 78, 16]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 198
buffer_index = 125 , len(sampled_ep) = 42, trace = from 7 to 15
ep_idx[i] = 291
buffer_index = 218 , len(sampled_ep) = 103, trace = from 14 to 22
ep_idx[i] = 118
buffer_index = 45 , len(sampled_ep) = 111, trace = from 23 to 31
ep_idx[i] = 187
buffer_index = 114 , len(sampled_ep) = 84, trace = from 14 to 22
ep_idx[i] = 175
buffer_index = 102 , len(sampled_ep) = 102, trace = from 92 to 100
ep_idx[i] = 123
buffer_index = 50 , len(sampled_ep) = 198, trace = from 34 to 42
ep_idx[i] = 104
buffer_index = 31 , len(sampled_ep) = 50, trace = from 11 to 19
ep_idx[i] = 205
buffer_index = 132 , len(sampled_ep) = 78, trace = from 3 to 11
ep_idx[i] = 325
buffer_index = 252 , len(sampled_ep) = 82, trace = from 52 to 60
ep_idx[i] = 224
buffer_index = 151 , len(sampled_ep) = 46, trace 

buffer_index = 277 , len(sampled_ep) = 198, trace = from 93 to 101
ep_idx[i] = 135
buffer_index = 62 , len(sampled_ep) = 211, trace = from 177 to 185
ep_idx[i] = 127
buffer_index = 54 , len(sampled_ep) = 50, trace = from 19 to 27
ep_idx[i] = 192
buffer_index = 119 , len(sampled_ep) = 78, trace = from 30 to 38
ep_idx[i] = 372
buffer_index = 299 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 293
buffer_index = 220 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 291
buffer_index = 218 , len(sampled_ep) = 103, trace = from 54 to 62
ep_idx[i] = 139
buffer_index = 66 , len(sampled_ep) = 133, trace = from 61 to 69
ep_idx[i] = 156
buffer_index = 83 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 257
buffer_index = 184 , len(sampled_ep) = 82, trace = from 66 to 74
ep_idx[i] = 363
buffer_index = 290 , len(sampled_ep) = 73, trace = from 53 to 61
ep_idx[i] = 203
buffer_index = 130 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 309
buffer_index = 236 , len(s

buffer_index = 48 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 92
buffer_index = 19 , len(sampled_ep) = 281, trace = from 86 to 94
ep_idx[i] = 302
buffer_index = 229 , len(sampled_ep) = 79, trace = from 52 to 60
ep_idx[i] = 330
buffer_index = 257 , len(sampled_ep) = 175, trace = from 139 to 147
ep_idx[i] = 234
buffer_index = 161 , len(sampled_ep) = 167, trace = from 146 to 154
ep_idx[i] = 182
buffer_index = 109 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 154
buffer_index = 81 , len(sampled_ep) = 153, trace = from 34 to 42
ep_idx[i] = 145
buffer_index = 72 , len(sampled_ep) = 117, trace = from 74 to 82
ep_idx[i] = 202
buffer_index = 129 , len(sampled_ep) = 143, trace = from 131 to 139
ep_idx[i] = 211
buffer_index = 138 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 253
buffer_index = 180 , len(sampled_ep) = 76, trace = from 5 to 13
ep_idx[i] = 120
buffer_index = 47 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 136
buffer_index = 63 , len

buffer_index = 264 , len(sampled_ep) = 82, trace = from 47 to 55
ep_idx[i] = 100
buffer_index = 27 , len(sampled_ep) = 102, trace = from 61 to 69
ep_idx[i] = 118
buffer_index = 45 , len(sampled_ep) = 111, trace = from 11 to 19
ep_idx[i] = 330
buffer_index = 257 , len(sampled_ep) = 175, trace = from 76 to 84
ep_idx[i] = 298
buffer_index = 225 , len(sampled_ep) = 115, trace = from 35 to 43
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 103 to 111
ep_idx[i] = 330
buffer_index = 257 , len(sampled_ep) = 175, trace = from 130 to 138
ep_idx[i] = 295
buffer_index = 222 , len(sampled_ep) = 81, trace = from 57 to 65
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9924160000000626
epsilon is = 0.9924145000000626
epsilon is = 0.9924130000000626
epsilon is = 0.9924115000000626
epsilon is = 0.9924100000000626
Target Set Success
ep_idx = [334, 281, 293, 196, 326, 302, 158, 131, 143, 295, 334, 330, 254, 222, 211, 141, 296, 178, 129, 159, 238, 245, 245, 168, 131, 291, 97, 248,

buffer_index = 168 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 337
buffer_index = 264 , len(sampled_ep) = 82, trace = from 5 to 13
ep_idx[i] = 266
buffer_index = 193 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 222
buffer_index = 149 , len(sampled_ep) = 224, trace = from 77 to 85
ep_idx[i] = 169
buffer_index = 96 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 215
buffer_index = 142 , len(sampled_ep) = 117, trace = from 10 to 18
ep_idx[i] = 229
buffer_index = 156 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 202
buffer_index = 129 , len(sampled_ep) = 143, trace = from 92 to 100
ep_idx[i] = 123
buffer_index = 50 , len(sampled_ep) = 198, trace = from 97 to 105
ep_idx[i] = 310
buffer_index = 237 , len(sampled_ep) = 164, trace = from 9 to 17
ep_idx[i] = 208
buffer_index = 135 , len(sampled_ep) = 81, trace = from 52 to 60
ep_idx[i] = 343
buffer_index = 270 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 309
buffer_index = 236 , len(sa

buffer_index = 104 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 306
buffer_index = 233 , len(sampled_ep) = 117, trace = from 46 to 54
ep_idx[i] = 291
buffer_index = 218 , len(sampled_ep) = 103, trace = from 9 to 17
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9923785000000629
epsilon is = 0.9923770000000629
epsilon is = 0.9923755000000629
epsilon is = 0.9923740000000629
epsilon is = 0.9923725000000629
Target Set Success
ep_idx = [146, 136, 288, 129, 221, 178, 149, 259, 227, 197, 274, 204, 115, 155, 115, 254, 290, 348, 221, 326, 243, 330, 339, 313, 199, 154, 311, 135, 130, 74, 353, 347]
exp_idx = [40, 60, 50, 66, 37, 10, 21, 35, 74, 24, 43, 31, 182, 13, 68, 27, 144, 74, 18, 16, 70, 161, 72, 15, 37, 32, 20, 42, 17, 18, 80, 15]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 146
buffer_index = 73 , len(sampled_ep) = 81, trace = from 33 to 41
ep_idx[i] = 136
buffer_index = 63 , len(sampled_ep) = 108, trace = from 53 to 61
ep_idx[i] = 288
buffer

buffer_index = 232 , len(sampled_ep) = 71, trace = from 50 to 58
ep_idx[i] = 123
buffer_index = 50 , len(sampled_ep) = 198, trace = from 41 to 49
ep_idx[i] = 126
buffer_index = 53 , len(sampled_ep) = 46, trace = from 2 to 10
ep_idx[i] = 135
buffer_index = 62 , len(sampled_ep) = 211, trace = from 20 to 28
ep_idx[i] = 274
buffer_index = 201 , len(sampled_ep) = 114, trace = from 23 to 31
ep_idx[i] = 316
buffer_index = 243 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 324
buffer_index = 251 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 310
buffer_index = 237 , len(sampled_ep) = 164, trace = from 123 to 131
ep_idx[i] = 372
buffer_index = 299 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 249
buffer_index = 176 , len(sampled_ep) = 75, trace = from 41 to 49
ep_idx[i] = 330
buffer_index = 257 , len(sampled_ep) = 175, trace = from 25 to 33
ep_idx[i] = 256
buffer_index = 183 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 346
buffer_index = 273 , len(

ep_idx = [221, 157, 268, 269, 118, 278, 238, 92, 301, 331, 191, 361, 162, 206, 356, 263, 213, 76, 286, 323, 334, 298, 206, 306, 220, 334, 322, 256, 349, 334, 277, 214]
exp_idx = [8, 16, 58, 12, 75, 77, 52, 245, 20, 71, 69, 28, 80, 46, 44, 19, 92, 161, 17, 24, 46, 102, 45, 99, 62, 109, 34, 25, 92, 109, 71, 61]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 221
buffer_index = 148 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 157
buffer_index = 84 , len(sampled_ep) = 103, trace = from 9 to 17
ep_idx[i] = 268
buffer_index = 195 , len(sampled_ep) = 120, trace = from 51 to 59
ep_idx[i] = 269
buffer_index = 196 , len(sampled_ep) = 45, trace = from 5 to 13
ep_idx[i] = 118
buffer_index = 45 , len(sampled_ep) = 111, trace = from 68 to 76
ep_idx[i] = 278
buffer_index = 205 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 238
buffer_index = 165 , len(sampled_ep) = 79, trace = from 45 to 53
ep_idx[i] = 92
buffer_index = 19 , len(sampled_ep) = 281, t

buffer_index = 136 , len(sampled_ep) = 71, trace = from 54 to 62
ep_idx[i] = 162
buffer_index = 89 , len(sampled_ep) = 112, trace = from 6 to 14
ep_idx[i] = 153
buffer_index = 80 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 82 to 90
ep_idx[i] = 255
buffer_index = 182 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 312
buffer_index = 239 , len(sampled_ep) = 108, trace = from 90 to 98
ep_idx[i] = 344
buffer_index = 271 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 311
buffer_index = 238 , len(sampled_ep) = 84, trace = from 52 to 60
ep_idx[i] = 339
buffer_index = 266 , len(sampled_ep) = 78, trace = from 31 to 39
ep_idx[i] = 147
buffer_index = 74 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 159
buffer_index = 86 , len(sampled_ep) = 79, trace = from 41 to 49
ep_idx[i] = 139
buffer_index = 66 , len(sampled_ep) = 133, trace = from 62 to 70
ep_idx[i] = 222
buffer_index = 149 , len(sample

buffer_index = 33 , len(sampled_ep) = 103, trace = from 1 to 9
ep_idx[i] = 308
buffer_index = 235 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 339
buffer_index = 266 , len(sampled_ep) = 78, trace = from 67 to 75
ep_idx[i] = 371
buffer_index = 298 , len(sampled_ep) = 299, trace = from 285 to 293
ep_idx[i] = 246
buffer_index = 173 , len(sampled_ep) = 47, trace = from 33 to 41
ep_idx[i] = 242
buffer_index = 169 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 340
buffer_index = 267 , len(sampled_ep) = 142, trace = from 15 to 23
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 51 to 59
ep_idx[i] = 286
buffer_index = 213 , len(sampled_ep) = 147, trace = from 111 to 119
ep_idx[i] = 85
buffer_index = 12 , len(sampled_ep) = 108, trace = from 86 to 94
ep_idx[i] = 177
buffer_index = 104 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 345
buffer_index = 272 , len(sampled_ep) = 108, trace = from 38 to 46
ep_idx[i] = 328
buffer_index = 255 , le

ep_idx[i] = 331
buffer_index = 258 , len(sampled_ep) = 84, trace = from 56 to 64
ep_idx[i] = 157
buffer_index = 84 , len(sampled_ep) = 103, trace = from 3 to 11
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 92 to 100
ep_idx[i] = 215
buffer_index = 142 , len(sampled_ep) = 117, trace = from 83 to 91
ep_idx[i] = 126
buffer_index = 53 , len(sampled_ep) = 46, trace = from 5 to 13
ep_idx[i] = 119
buffer_index = 46 , len(sampled_ep) = 183, trace = from 32 to 40
ep_idx[i] = 308
buffer_index = 235 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 153
buffer_index = 80 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 79
buffer_index = 6 , len(sampled_ep) = 75, trace = from 39 to 47
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9922810000000637
epsilon is = 0.9922795000000637
epsilon is = 0.9922780000000637
epsilon is = 0.9922765000000637
epsilon is = 0.9922750000000637
Target Set Success
ep_idx = [237, 357, 76, 337, 213, 350, 280, 110, 231, 225, 296,

buffer_index = 81 , len(sampled_ep) = 153, trace = from 28 to 36
ep_idx[i] = 218
buffer_index = 145 , len(sampled_ep) = 80, trace = from 6 to 14
ep_idx[i] = 192
buffer_index = 119 , len(sampled_ep) = 78, trace = from 8 to 16
ep_idx[i] = 92
buffer_index = 19 , len(sampled_ep) = 281, trace = from 140 to 148
ep_idx[i] = 335
buffer_index = 262 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 286
buffer_index = 213 , len(sampled_ep) = 147, trace = from 130 to 138
ep_idx[i] = 123
buffer_index = 50 , len(sampled_ep) = 198, trace = from 135 to 143
ep_idx[i] = 128
buffer_index = 55 , len(sampled_ep) = 74, trace = from 7 to 15
ep_idx[i] = 149
buffer_index = 76 , len(sampled_ep) = 82, trace = from 24 to 32
ep_idx[i] = 216
buffer_index = 143 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 261
buffer_index = 188 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 315
buffer_index = 242 , len(

ep_idx[i] = 235
buffer_index = 162 , len(sampled_ep) = 46, trace = from 15 to 23
ep_idx[i] = 95
buffer_index = 22 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 130
buffer_index = 57 , len(sampled_ep) = 81, trace = from 42 to 50
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.992243500000064
epsilon is = 0.992242000000064
epsilon is = 0.992240500000064
epsilon is = 0.992239000000064
epsilon is = 0.992237500000064
Target Set Success
ep_idx = [368, 348, 334, 325, 114, 328, 106, 267, 152, 142, 331, 92, 103, 164, 188, 330, 286, 237, 305, 103, 229, 310, 138, 184, 371, 115, 272, 158, 331, 136, 283, 334]
exp_idx = [45, 71, 104, 44, 72, 42, 44, 51, 81, 13, 23, 35, 23, 49, 64, 17, 9, 51, 69, 69, 51, 123, 55, 43, 59, 181, 34, 66, 45, 77, 13, 97]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 368
buffer_index = 295 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 348
buffer_index = 275 , len(sampled_ep) = 77, trace = from 64 to 72
ep_idx[i] = 334

buffer_index = 3 , len(sampled_ep) = 198, trace = from 167 to 175
ep_idx[i] = 88
buffer_index = 15 , len(sampled_ep) = 114, trace = from 43 to 51
ep_idx[i] = 338
buffer_index = 265 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 154
buffer_index = 81 , len(sampled_ep) = 153, trace = from 104 to 112
ep_idx[i] = 223
buffer_index = 150 , len(sampled_ep) = 112, trace = from 62 to 70
ep_idx[i] = 363
buffer_index = 290 , len(sampled_ep) = 73, trace = from 65 to 73
ep_idx[i] = 349
buffer_index = 276 , len(sampled_ep) = 228, trace = from 207 to 215
ep_idx[i] = 358
buffer_index = 285 , len(sampled_ep) = 81, trace = from 44 to 52
ep_idx[i] = 267
buffer_index = 194 , len(sampled_ep) = 83, trace = from 26 to 34
ep_idx[i] = 118
buffer_index = 45 , len(sampled_ep) = 111, trace = from 69 to 77
ep_idx[i] = 309
buffer_index = 236 , len(sampled_ep) = 115, trace = from 92 to 100
ep_idx[i] = 115
buffer_index = 42 , len(sampled_ep) = 299, trace = from 223 to 231
ep_idx[i] = 305
buffer_index = 232

ep_idx = [184, 284, 181, 314, 286, 318, 134, 308, 166, 295, 281, 152, 336, 75, 242, 328, 271, 334, 356, 310, 103, 157, 82, 152, 248, 146, 326, 207, 339, 172, 334, 324]
exp_idx = [44, 17, 12, 18, 14, 76, 24, 57, 49, 37, 112, 102, 45, 30, 23, 22, 14, 108, 37, 120, 35, 77, 38, 108, 35, 74, 63, 67, 43, 31, 108, 35]
idx_offset = 73, self.episode_index = 372, len(self.buffer) = 300
ep_idx[i] = 184
buffer_index = 111 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 284
buffer_index = 211 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 181
buffer_index = 108 , len(sampled_ep) = 43, trace = from 5 to 13
ep_idx[i] = 314
buffer_index = 241 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 286
buffer_index = 213 , len(sampled_ep) = 147, trace = from 7 to 15
ep_idx[i] = 318
buffer_index = 245 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 134
buffer_index = 61 , len(sampled_ep) = 208, trace = from 17 to 25
ep_idx[i] = 308
buffer_index = 235 , len(sampled_ep) = 

buffer_index = 114 , len(sampled_ep) = 84, trace = from 48 to 56
ep_idx[i] = 97
buffer_index = 24 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 194
buffer_index = 121 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 114
buffer_index = 41 , len(sampled_ep) = 81, trace = from 70 to 78
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 71 to 79
ep_idx[i] = 138
buffer_index = 65 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 334
buffer_index = 261 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 349
buffer_index = 276 , len(sampled_ep) = 228, trace = from 180 to 188
ep_idx[i] = 199
buffer_index = 126 , len(sampled_ep) = 116, trace = from 93 to 101
ep_idx[i] = 273
buffer_index = 200 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 158
buffer_index = 85 , len(sampled_ep) = 83, trace = from 2 to 10
ep_idx[i] = 141
buffer_index = 68 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 222
buffer_index = 149 , len(samp

buffer_index = 79 , len(sampled_ep) = 228, trace = from 100 to 108
ep_idx[i] = 78
buffer_index = 5 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 329
buffer_index = 256 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 159
buffer_index = 86 , len(sampled_ep) = 79, trace = from 40 to 48
ep_idx[i] = 328
buffer_index = 255 , len(sampled_ep) = 75, trace = from 55 to 63
ep_idx[i] = 164
buffer_index = 91 , len(sampled_ep) = 118, trace = from 23 to 31
ep_idx[i] = 363
buffer_index = 290 , len(sampled_ep) = 73, trace = from 41 to 49
ep_idx[i] = 76
buffer_index = 3 , len(sampled_ep) = 198, trace = from 36 to 44
ep_idx[i] = 219
buffer_index = 146 , len(sampled_ep) = 110, trace = from 63 to 71
ep_idx[i] = 86
buffer_index = 13 , len(sampled_ep) = 77, trace = from 47 to 55
ep_idx[i] = 180
buffer_index = 107 , len(sampled_ep) = 141, trace = from 36 to 44
ep_idx[i] = 84
buffer_index = 11 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 349
buffer_index = 276 , len(sampled_e

buffer_index = 259 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 211
buffer_index = 138 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 192
buffer_index = 119 , len(sampled_ep) = 78, trace = from 7 to 15
ep_idx[i] = 100
buffer_index = 27 , len(sampled_ep) = 102, trace = from 23 to 31
ep_idx[i] = 93
buffer_index = 20 , len(sampled_ep) = 77, trace = from 14 to 22
ep_idx[i] = 224
buffer_index = 151 , len(sampled_ep) = 46, trace = from 1 to 9
ep_idx[i] = 92
buffer_index = 19 , len(sampled_ep) = 281, trace = from 22 to 30
ep_idx[i] = 332
buffer_index = 259 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 95
buffer_index = 22 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 331
buffer_index = 258 , len(sampled_ep) = 84, trace = from 75 to 83
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9921460000000648
epsilon is = 0.9921445000000648
epsilon is = 0.9921430000000648
epsilon is = 0.9921415000000648
epsilon is = 0.9921400000000649
Target Set Success
ep

ep_idx[i] = 325
buffer_index = 251 , len(sampled_ep) = 82, trace = from 25 to 33
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 12 to 20
ep_idx[i] = 238
buffer_index = 164 , len(sampled_ep) = 79, trace = from 7 to 15
ep_idx[i] = 232
buffer_index = 158 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 131
buffer_index = 57 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 331
buffer_index = 257 , len(sampled_ep) = 84, trace = from 13 to 21
ep_idx[i] = 205
buffer_index = 131 , len(sampled_ep) = 78, trace = from 46 to 54
ep_idx[i] = 330
buffer_index = 256 , len(sampled_ep) = 175, trace = from 57 to 65
ep_idx[i] = 110
buffer_index = 36 , len(sampled_ep) = 137, trace = from 64 to 72
ep_idx[i] = 281
buffer_index = 207 , len(sampled_ep) = 178, trace = from 15 to 23
ep_idx[i] = 348
buffer_index = 274 , len(sampled_ep) = 77, trace = from 25 to 33
ep_idx[i] = 167
buffer_index = 93 , len(sampled_ep) = 81, trace = from 63 to 71
ep_idx[i] = 202
buffer_index

buffer_index = 266 , len(sampled_ep) = 142, trace = from 64 to 72
ep_idx[i] = 332
buffer_index = 258 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 115
buffer_index = 41 , len(sampled_ep) = 299, trace = from 147 to 155
ep_idx[i] = 263
buffer_index = 189 , len(sampled_ep) = 51, trace = from 2 to 10
ep_idx[i] = 365
buffer_index = 291 , len(sampled_ep) = 81, trace = from 48 to 56
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9921085000000651
epsilon is = 0.9921070000000651
epsilon is = 0.9921055000000651
epsilon is = 0.9921040000000652
epsilon is = 0.9921025000000652
Target Set Success
ep_idx = [79, 334, 308, 332, 370, 252, 117, 336, 111, 280, 261, 158, 154, 334, 200, 330, 310, 242, 96, 137, 226, 247, 259, 122, 201, 207, 273, 136, 322, 211, 196, 214]
exp_idx = [72, 11, 20, 75, 150, 17, 17, 50, 16, 9, 31, 78, 76, 100, 7, 159, 146, 21, 16, 24, 56, 17, 47, 29, 55, 10, 47, 11, 108, 18, 16, 12]
idx_offset = 74, self.episode_index = 373, len(self.buffer) = 300
ep_idx[i] = 79
buffer

buffer_index = 242 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 331
buffer_index = 257 , len(sampled_ep) = 84, trace = from 49 to 57
ep_idx[i] = 103
buffer_index = 29 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 74
buffer_index = 0 , len(sampled_ep) = 81, trace = from 65 to 73
ep_idx[i] = 342
buffer_index = 268 , len(sampled_ep) = 81, trace = from 38 to 46
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 67 to 75
ep_idx[i] = 332
buffer_index = 258 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 205
buffer_index = 131 , len(sampled_ep) = 78, trace = from 34 to 42
ep_idx[i] = 151
buffer_index = 77 , len(sampled_ep) = 82, trace = from 6 to 14
ep_idx[i] = 303
buffer_index = 229 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 148
buffer_index = 74 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 174
buffer_index = 100 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 373
buffer_index = 299 , len(sampled_ep

epsilon is = 0.9920710000000654
epsilon is = 0.9920695000000654
epsilon is = 0.9920680000000655
epsilon is = 0.9920665000000655
epsilon is = 0.9920650000000655
Target Set Success
ep_idx = [227, 360, 331, 275, 244, 135, 154, 186, 260, 334, 116, 310, 342, 285, 138, 82, 220, 120, 238, 102, 84, 325, 222, 371, 303, 333, 144, 306, 329, 229, 283, 220]
exp_idx = [80, 32, 45, 22, 9, 193, 87, 38, 77, 99, 16, 90, 69, 9, 42, 10, 36, 44, 75, 14, 52, 44, 19, 58, 17, 36, 18, 20, 72, 56, 42, 53]
idx_offset = 74, self.episode_index = 373, len(self.buffer) = 300
ep_idx[i] = 227
buffer_index = 153 , len(sampled_ep) = 112, trace = from 73 to 81
ep_idx[i] = 360
buffer_index = 286 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 331
buffer_index = 257 , len(sampled_ep) = 84, trace = from 38 to 46
ep_idx[i] = 275
buffer_index = 201 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 244
buffer_index = 170 , len(sampled_ep) = 17, trace = from 2 to 10
ep_idx[i] = 135
buffer_index = 61 , len(samp

buffer_index = 175 , len(sampled_ep) = 75, trace = from 50 to 58
ep_idx[i] = 285
buffer_index = 211 , len(sampled_ep) = 166, trace = from 15 to 23
ep_idx[i] = 236
buffer_index = 162 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 281
buffer_index = 207 , len(sampled_ep) = 178, trace = from 169 to 177
ep_idx[i] = 117
buffer_index = 43 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 120
buffer_index = 46 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 350
buffer_index = 276 , len(sampled_ep) = 198, trace = from 44 to 52
ep_idx[i] = 248
buffer_index = 174 , len(sampled_ep) = 77, trace = from 7 to 15
ep_idx[i] = 111
buffer_index = 37 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 213
buffer_index = 139 , len(sampled_ep) = 148, trace = from 84 to 92
ep_idx[i] = 187
buffer_index = 113 , len(sampled_ep) = 84, trace = from 24 to 32
ep_idx[i] = 340
buffer_index = 266 , le

exp_idx = [15, 36, 34, 129, 18, 26, 78, 13, 39, 92, 11, 51, 61, 25, 41, 26, 46, 141, 48, 34, 61, 14, 95, 18, 16, 49, 16, 34, 46, 165, 78, 46]
idx_offset = 74, self.episode_index = 373, len(self.buffer) = 300
ep_idx[i] = 210
buffer_index = 136 , len(sampled_ep) = 81, trace = from 8 to 16
ep_idx[i] = 125
buffer_index = 51 , len(sampled_ep) = 75, trace = from 29 to 37
ep_idx[i] = 75
buffer_index = 1 , len(sampled_ep) = 75, trace = from 27 to 35
ep_idx[i] = 135
buffer_index = 61 , len(sampled_ep) = 211, trace = from 122 to 130
ep_idx[i] = 297
buffer_index = 223 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 126
buffer_index = 52 , len(sampled_ep) = 46, trace = from 19 to 27
ep_idx[i] = 332
buffer_index = 258 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 173
buffer_index = 99 , len(sampled_ep) = 71, trace = from 6 to 14
ep_idx[i] = 109
buffer_index = 35 , len(sampled_ep) = 48, trace = from 32 to 40
ep_idx[i] = 373
buffer_index = 299 , len(sampled_ep) = 299, trace = fr

buffer_index = 246 , len(sampled_ep) = 71, trace = from 54 to 62
ep_idx[i] = 162
buffer_index = 88 , len(sampled_ep) = 112, trace = from 43 to 51
ep_idx[i] = 350
buffer_index = 276 , len(sampled_ep) = 198, trace = from 71 to 79
ep_idx[i] = 347
buffer_index = 273 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 291
buffer_index = 217 , len(sampled_ep) = 103, trace = from 40 to 48
ep_idx[i] = 325
buffer_index = 251 , len(sampled_ep) = 82, trace = from 1 to 9
ep_idx[i] = 281
buffer_index = 207 , len(sampled_ep) = 178, trace = from 26 to 34
ep_idx[i] = 87
buffer_index = 13 , len(sampled_ep) = 76, trace = from 3 to 11
ep_idx[i] = 335
buffer_index = 261 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 223
buffer_index = 149 , len(sampled_ep) = 112, trace = from 18 to 26
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 53 to 61
ep_idx[i] = 291
buffer_index = 217 , len(sampled_ep) = 103, trace = from 83 to 91
sampledTraces.shape = (32, 8, 6)
epsilon is 

buffer_index = 296 , len(sampled_ep) = 189, trace = from 129 to 137
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 105 to 113
ep_idx[i] = 351
buffer_index = 277 , len(sampled_ep) = 143, trace = from 67 to 75
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 97 to 105
ep_idx[i] = 165
buffer_index = 91 , len(sampled_ep) = 81, trace = from 25 to 33
ep_idx[i] = 236
buffer_index = 162 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 332
buffer_index = 258 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 335
buffer_index = 261 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 300
buffer_index = 226 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 87
buffer_index = 13 , len(sampled_ep) = 76, trace = from 67 to 75
ep_idx[i] = 226
buffer_index = 152 , len(sampled_ep) = 120, trace = from 12 to 20
ep_idx[i] = 92
buffer_index = 18 , len(sampled_ep) = 281, trace = from 208 to 216
ep_idx[i] = 269
buffer_index = 195 , l

buffer_index = 18 , len(sampled_ep) = 281, trace = from 6 to 14
ep_idx[i] = 237
buffer_index = 163 , len(sampled_ep) = 116, trace = from 71 to 79
ep_idx[i] = 309
buffer_index = 235 , len(sampled_ep) = 115, trace = from 92 to 100
ep_idx[i] = 246
buffer_index = 172 , len(sampled_ep) = 47, trace = from 22 to 30
ep_idx[i] = 272
buffer_index = 198 , len(sampled_ep) = 70, trace = from 10 to 18
ep_idx[i] = 94
buffer_index = 20 , len(sampled_ep) = 45, trace = from 8 to 16
ep_idx[i] = 123
buffer_index = 49 , len(sampled_ep) = 198, trace = from 50 to 58
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9919735000000662
epsilon is = 0.9919720000000662
epsilon is = 0.9919705000000663
epsilon is = 0.9919690000000663
epsilon is = 0.9919675000000663
Target Set Success
ep_idx = [332, 194, 365, 171, 312, 76, 349, 187, 97, 223, 283, 309, 217, 356, 335, 333, 221, 299, 301, 184, 153, 116, 255, 373, 148, 109, 277, 97, 370, 281, 222, 115]
exp_idx = [77, 43, 10, 80, 81, 39, 43, 64, 63, 106, 46, 88, 43, 39, 23,

buffer_index = 45 , len(sampled_ep) = 183, trace = from 116 to 124
ep_idx[i] = 141
buffer_index = 67 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 208
buffer_index = 134 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 213
buffer_index = 139 , len(sampled_ep) = 148, trace = from 84 to 92
ep_idx[i] = 296
buffer_index = 222 , len(sampled_ep) = 108, trace = from 44 to 52
ep_idx[i] = 154
buffer_index = 80 , len(sampled_ep) = 153, trace = from 139 to 147
ep_idx[i] = 247
buffer_index = 173 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 155
buffer_index = 81 , len(sampled_ep) = 77, trace = from 5 to 13
ep_idx[i] = 332
buffer_index = 258 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 78
buffer_index = 4 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 203
buffer_index = 129 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 328
buffer_index = 254 , len(sampled_ep) = 75, trace = from 16 to 24
ep_idx[i] = 136
buffer_index = 62 , len(samp

ep_idx[i] = 329
buffer_index = 255 , len(sampled_ep) = 81, trace = from 57 to 65
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9919360000000665
epsilon is = 0.9919345000000666
epsilon is = 0.9919330000000666
epsilon is = 0.9919315000000666
epsilon is = 0.9919300000000666
Target Set Success
ep_idx = [187, 235, 322, 254, 327, 196, 285, 195, 146, 128, 246, 223, 243, 320, 135, 169, 239, 254, 110, 225, 259, 74, 171, 297, 343, 342, 207, 318, 74, 334, 149, 100]
exp_idx = [21, 21, 56, 87, 65, 62, 138, 21, 40, 60, 34, 105, 23, 17, 42, 11, 16, 22, 134, 16, 16, 75, 16, 61, 63, 43, 73, 43, 17, 24, 75, 77]
idx_offset = 74, self.episode_index = 373, len(self.buffer) = 300
ep_idx[i] = 187
buffer_index = 113 , len(sampled_ep) = 84, trace = from 14 to 22
ep_idx[i] = 235
buffer_index = 161 , len(sampled_ep) = 46, trace = from 14 to 22
ep_idx[i] = 322
buffer_index = 248 , len(sampled_ep) = 111, trace = from 49 to 57
ep_idx[i] = 254
buffer_index = 180 , len(sampled_ep) = 116, trace = from 80 to 88
ep_id

buffer_index = 149 , len(sampled_ep) = 112, trace = from 46 to 54
ep_idx[i] = 139
buffer_index = 65 , len(sampled_ep) = 133, trace = from 38 to 46
ep_idx[i] = 96
buffer_index = 22 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 78 to 86
ep_idx[i] = 107
buffer_index = 33 , len(sampled_ep) = 109, trace = from 57 to 65
ep_idx[i] = 227
buffer_index = 153 , len(sampled_ep) = 112, trace = from 73 to 81
ep_idx[i] = 102
buffer_index = 28 , len(sampled_ep) = 103, trace = from 87 to 95
ep_idx[i] = 207
buffer_index = 133 , len(sampled_ep) = 75, trace = from 20 to 28
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 162
buffer_index = 88 , len(sampled_ep) = 112, trace = from 23 to 31
ep_idx[i] = 363
buffer_index = 289 , len(sampled_ep) = 73, trace = from 54 to 62
ep_idx[i] = 312
buffer_index = 238 , len(sampled_ep) = 108, trace = from 75 to 83
ep_idx[i] = 225
buffer_index = 151 , le

ep_idx = [309, 78, 334, 350, 242, 163, 287, 334, 223, 149, 289, 370, 115, 273, 332, 174, 338, 81, 167, 239, 247, 214, 334, 209, 373, 349, 319, 299, 329, 330, 340, 254]
exp_idx = [24, 63, 60, 58, 80, 96, 25, 103, 16, 76, 9, 167, 146, 59, 80, 36, 44, 11, 17, 54, 58, 12, 106, 43, 228, 194, 17, 75, 23, 68, 119, 49]
idx_offset = 74, self.episode_index = 373, len(self.buffer) = 300
ep_idx[i] = 309
buffer_index = 235 , len(sampled_ep) = 115, trace = from 17 to 25
ep_idx[i] = 78
buffer_index = 4 , len(sampled_ep) = 81, trace = from 56 to 64
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) = 113, trace = from 53 to 61
ep_idx[i] = 350
buffer_index = 276 , len(sampled_ep) = 198, trace = from 51 to 59
ep_idx[i] = 242
buffer_index = 168 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 163
buffer_index = 89 , len(sampled_ep) = 134, trace = from 89 to 97
ep_idx[i] = 287
buffer_index = 213 , len(sampled_ep) = 194, trace = from 18 to 26
ep_idx[i] = 334
buffer_index = 260 , len(sampled_ep) 

buffer_index = 201 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 277
buffer_index = 202 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 237
buffer_index = 162 , len(sampled_ep) = 116, trace = from 4 to 12
ep_idx[i] = 371
buffer_index = 296 , len(sampled_ep) = 299, trace = from 242 to 250
ep_idx[i] = 271
buffer_index = 196 , len(sampled_ep) = 46, trace = from 9 to 17
ep_idx[i] = 334
buffer_index = 259 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 178
buffer_index = 103 , len(sampled_ep) = 80, trace = from 2 to 10
ep_idx[i] = 86
buffer_index = 11 , len(sampled_ep) = 77, trace = from 41 to 49
ep_idx[i] = 361
buffer_index = 286 , len(sampled_ep) = 47, trace = from 13 to 21
ep_idx[i] = 160
buffer_index = 85 , len(sampled_ep) = 117, trace = from 38 to 46
ep_idx[i] = 349
buffer_index = 274 , len(sampled_ep) = 228, trace = from 124 to 132
ep_idx[i] = 240
buffer_index = 165 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 76
buffer_index = 1 , len(s

buffer_index = 102 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 222
buffer_index = 147 , len(sampled_ep) = 224, trace = from 46 to 54
ep_idx[i] = 330
buffer_index = 255 , len(sampled_ep) = 175, trace = from 134 to 142
ep_idx[i] = 283
buffer_index = 208 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 331
buffer_index = 256 , len(sampled_ep) = 84, trace = from 35 to 43
ep_idx[i] = 297
buffer_index = 222 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 110
buffer_index = 35 , len(sampled_ep) = 137, trace = from 120 to 128
ep_idx[i] = 250
buffer_index = 175 , len(sampled_ep) = 49, trace = from 13 to 21
ep_idx[i] = 234
buffer_index = 159 , len(sampled_ep) = 167, trace = from 75 to 83
ep_idx[i] = 115
buffer_index = 40 , len(sampled_ep) = 299, trace = from 84 to 92
ep_idx[i] = 255
buffer_index = 180 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 218
buffer_index = 143 , len(sampled_ep) = 80, trace = from 55 to 63
ep_idx[i] = 223
buffer_index = 148 , le

buffer_index = 135 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 134
buffer_index = 59 , len(sampled_ep) = 208, trace = from 100 to 108
ep_idx[i] = 348
buffer_index = 273 , len(sampled_ep) = 77, trace = from 64 to 72
ep_idx[i] = 156
buffer_index = 81 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 96
buffer_index = 21 , len(sampled_ep) = 81, trace = from 23 to 31
ep_idx[i] = 115
buffer_index = 40 , len(sampled_ep) = 299, trace = from 72 to 80
ep_idx[i] = 171
buffer_index = 96 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 286
buffer_index = 211 , len(sampled_ep) = 147, trace = from 58 to 66
ep_idx[i] = 111
buffer_index = 36 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 202
buffer_index = 127 , len(sampled_ep) = 143, trace = from 92 to 100
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9918385000000673
epsilon is = 0.9918370000000674
epsilon is = 0.9918355000000674
epsilon is = 0.9918340000000674
epsilon is = 0.9918325000000674
Target Set Suc

buffer_index = 298 , len(sampled_ep) = 299, trace = from 213 to 221
ep_idx[i] = 330
buffer_index = 255 , len(sampled_ep) = 175, trace = from 135 to 143
ep_idx[i] = 131
buffer_index = 56 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 334
buffer_index = 259 , len(sampled_ep) = 113, trace = from 89 to 97
ep_idx[i] = 207
buffer_index = 132 , len(sampled_ep) = 75, trace = from 24 to 32
ep_idx[i] = 145
buffer_index = 70 , len(sampled_ep) = 117, trace = from 5 to 13
ep_idx[i] = 373
buffer_index = 298 , len(sampled_ep) = 299, trace = from 282 to 290
ep_idx[i] = 300
buffer_index = 225 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 283
buffer_index = 208 , len(sampled_ep) = 81, trace = from 16 to 24
ep_idx[i] = 112
buffer_index = 37 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 112
buffer_index = 37 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 274
buffer_index = 199 , len(sampled_ep) = 114, trace = from 22 to 30
ep_idx[i] = 332
buffer_index = 257 , 

buffer_index = 45 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 249
buffer_index = 174 , len(sampled_ep) = 75, trace = from 52 to 60
ep_idx[i] = 175
buffer_index = 100 , len(sampled_ep) = 102, trace = from 29 to 37
ep_idx[i] = 281
buffer_index = 206 , len(sampled_ep) = 178, trace = from 6 to 14
ep_idx[i] = 334
buffer_index = 259 , len(sampled_ep) = 113, trace = from 84 to 92
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9918010000000677
epsilon is = 0.9917995000000677
epsilon is = 0.9917980000000677
epsilon is = 0.9917965000000677
epsilon is = 0.9917950000000677
Target Set Success
ep_idx = [90, 260, 134, 337, 155, 298, 84, 286, 334, 333, 84, 321, 238, 332, 256, 147, 163, 331, 81, 205, 340, 298, 135, 214, 163, 362, 304, 353, 236, 157, 271, 226]
exp_idx = [28, 46, 16, 19, 64, 41, 19, 98, 106, 79, 16, 17, 15, 78, 57, 11, 125, 35, 75, 14, 91, 46, 131, 12, 8, 77, 15, 31, 62, 76, 35, 18]
idx_offset = 75, self.episode_index = 374, len(self.buffer) = 300
ep_idx[i] = 90
buffer_ind

buffer_index = 0 , len(sampled_ep) = 75, trace = from 36 to 44
ep_idx[i] = 330
buffer_index = 255 , len(sampled_ep) = 175, trace = from 39 to 47
ep_idx[i] = 169
buffer_index = 94 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 81
buffer_index = 6 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 264
buffer_index = 189 , len(sampled_ep) = 109, trace = from 10 to 18
ep_idx[i] = 227
buffer_index = 152 , len(sampled_ep) = 112, trace = from 41 to 49
ep_idx[i] = 134
buffer_index = 59 , len(sampled_ep) = 208, trace = from 76 to 84
ep_idx[i] = 332
buffer_index = 257 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 275
buffer_index = 200 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 184
buffer_index = 109 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 192
buffer_index = 117 , len(sampled_ep) = 78, trace = from 43 to 51
ep_idx[i] = 242
buffer_index = 167 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 158
buffer_index = 83 , len(sampled_e

epsilon is = 0.991763500000068
epsilon is = 0.991762000000068
epsilon is = 0.991760500000068
epsilon is = 0.991759000000068
length of poped element = 75 , cntr = 68 , diff = 7
epsilon is = 0.991757500000068
Target Set Success
ep_idx = [129, 333, 176, 196, 143, 120, 334, 277, 202, 152, 107, 251, 154, 136, 171, 101, 202, 95, 130, 99, 202, 272, 77, 281, 226, 123, 256, 304, 293, 334, 330, 330]
exp_idx = [16, 69, 72, 54, 14, 37, 106, 16, 88, 15, 103, 53, 117, 22, 16, 55, 41, 59, 65, 66, 46, 18, 17, 113, 85, 137, 80, 15, 43, 109, 41, 21]
idx_offset = 76, self.episode_index = 375, len(self.buffer) = 300
ep_idx[i] = 129
buffer_index = 53 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 333
buffer_index = 257 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 176
buffer_index = 100 , len(sampled_ep) = 80, trace = from 65 to 73
ep_idx[i] = 196
buffer_index = 120 , len(sampled_ep) = 135, trace = from 47 to 55
ep_idx[i] = 143
buffer_index = 67 , len(sampled_ep) = 78, trace = from 7 

buffer_index = 155 , len(sampled_ep) = 77, trace = from 6 to 14
ep_idx[i] = 205
buffer_index = 129 , len(sampled_ep) = 78, trace = from 45 to 53
ep_idx[i] = 209
buffer_index = 133 , len(sampled_ep) = 71, trace = from 4 to 12
ep_idx[i] = 321
buffer_index = 245 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 132
buffer_index = 56 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 170
buffer_index = 94 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 286
buffer_index = 210 , len(sampled_ep) = 147, trace = from 29 to 37
ep_idx[i] = 87
buffer_index = 11 , len(sampled_ep) = 76, trace = from 14 to 22
ep_idx[i] = 188
buffer_index = 112 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 262
buffer_index = 186 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 192
buffer_index = 116 , len(sampled_ep) = 78, trace = from 49 to 57
ep_idx[i] = 227
buffer_index = 151 , len(sampled_ep) = 112, trace = from 99 to 107
ep_idx[i] = 116
buffer_index = 40 , len(sampled_

exp_idx = [40, 52, 51, 31, 10, 158, 99, 45, 107, 12, 112, 77, 118, 49, 46, 7, 28, 30, 8, 16, 30, 73, 33, 11, 15, 42, 51, 51, 75, 16, 11, 33]
idx_offset = 76, self.episode_index = 375, len(self.buffer) = 300
ep_idx[i] = 224
buffer_index = 148 , len(sampled_ep) = 46, trace = from 33 to 41
ep_idx[i] = 348
buffer_index = 272 , len(sampled_ep) = 77, trace = from 45 to 53
ep_idx[i] = 196
buffer_index = 120 , len(sampled_ep) = 135, trace = from 44 to 52
ep_idx[i] = 154
buffer_index = 78 , len(sampled_ep) = 153, trace = from 24 to 32
ep_idx[i] = 309
buffer_index = 233 , len(sampled_ep) = 115, trace = from 3 to 11
ep_idx[i] = 234
buffer_index = 158 , len(sampled_ep) = 167, trace = from 151 to 159
ep_idx[i] = 100
buffer_index = 24 , len(sampled_ep) = 102, trace = from 92 to 100
ep_idx[i] = 86
buffer_index = 10 , len(sampled_ep) = 77, trace = from 38 to 46
ep_idx[i] = 334
buffer_index = 258 , len(sampled_ep) = 113, trace = from 100 to 108
ep_idx[i] = 133
buffer_index = 57 , len(sampled_ep) = 81, 

buffer_index = 258 , len(sampled_ep) = 113, trace = from 97 to 105
ep_idx[i] = 205
buffer_index = 129 , len(sampled_ep) = 78, trace = from 38 to 46
ep_idx[i] = 84
buffer_index = 8 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 369
buffer_index = 293 , len(sampled_ep) = 75, trace = from 20 to 28
ep_idx[i] = 184
buffer_index = 108 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 287
buffer_index = 211 , len(sampled_ep) = 194, trace = from 169 to 177
ep_idx[i] = 332
buffer_index = 256 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 207
buffer_index = 131 , len(sampled_ep) = 75, trace = from 14 to 22
ep_idx[i] = 330
buffer_index = 254 , len(sampled_ep) = 175, trace = from 148 to 156
ep_idx[i] = 338
buffer_index = 262 , len(sampled_ep) = 81, trace = from 45 to 53
ep_idx[i] = 87
buffer_index = 11 , len(sampled_ep) = 76, trace = from 48 to 56
ep_idx[i] = 269
buffer_index = 193 , len(sampled_ep) = 45, trace = from 3 to 11
ep_idx[i] = 238
buffer_index = 162 , len(sa

buffer_index = 294 , len(sampled_ep) = 189, trace = from 173 to 181
ep_idx[i] = 125
buffer_index = 49 , len(sampled_ep) = 75, trace = from 7 to 15
ep_idx[i] = 176
buffer_index = 100 , len(sampled_ep) = 80, trace = from 5 to 13
ep_idx[i] = 334
buffer_index = 258 , len(sampled_ep) = 113, trace = from 94 to 102
ep_idx[i] = 119
buffer_index = 43 , len(sampled_ep) = 183, trace = from 7 to 15
ep_idx[i] = 368
buffer_index = 292 , len(sampled_ep) = 81, trace = from 18 to 26
ep_idx[i] = 82
buffer_index = 6 , len(sampled_ep) = 73, trace = from 4 to 12
ep_idx[i] = 224
buffer_index = 148 , len(sampled_ep) = 46, trace = from 19 to 27
ep_idx[i] = 115
buffer_index = 39 , len(sampled_ep) = 299, trace = from 14 to 22
ep_idx[i] = 153
buffer_index = 77 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 315
buffer_index = 239 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 306
buffer_index = 230 , len(sampled_ep) = 117, trace = from 9 to 17
ep_idx[i] = 313
buffer_index = 237 , len(sampled

buffer_index = 271 , len(sampled_ep) = 81, trace = from 47 to 55
ep_idx[i] = 106
buffer_index = 30 , len(sampled_ep) = 103, trace = from 48 to 56
ep_idx[i] = 180
buffer_index = 104 , len(sampled_ep) = 141, trace = from 47 to 55
ep_idx[i] = 115
buffer_index = 39 , len(sampled_ep) = 299, trace = from 203 to 211
ep_idx[i] = 338
buffer_index = 262 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 355
buffer_index = 279 , len(sampled_ep) = 81, trace = from 17 to 25
ep_idx[i] = 263
buffer_index = 187 , len(sampled_ep) = 51, trace = from 33 to 41
ep_idx[i] = 353
buffer_index = 277 , len(sampled_ep) = 81, trace = from 10 to 18
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9916660000000688
epsilon is = 0.9916645000000688
epsilon is = 0.9916630000000688
epsilon is = 0.9916615000000688
epsilon is = 0.9916600000000688
Target Set Success
ep_idx = [201, 273, 168, 92, 214, 363, 76, 163, 162, 230, 228, 181, 330, 162, 251, 326, 334, 88, 202, 156, 334, 290, 351, 275, 253, 208, 287, 182, 184, 1

buffer_index = 253 , len(sampled_ep) = 175, trace = from 34 to 42
ep_idx[i] = 243
buffer_index = 166 , len(sampled_ep) = 81, trace = from 61 to 69
ep_idx[i] = 334
buffer_index = 257 , len(sampled_ep) = 113, trace = from 82 to 90
ep_idx[i] = 243
buffer_index = 166 , len(sampled_ep) = 81, trace = from 5 to 13
ep_idx[i] = 343
buffer_index = 266 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 299
buffer_index = 222 , len(sampled_ep) = 100, trace = from 26 to 34
ep_idx[i] = 305
buffer_index = 228 , len(sampled_ep) = 71, trace = from 20 to 28
ep_idx[i] = 101
buffer_index = 24 , len(sampled_ep) = 81, trace = from 64 to 72
ep_idx[i] = 207
buffer_index = 130 , len(sampled_ep) = 75, trace = from 43 to 51
ep_idx[i] = 299
buffer_index = 222 , len(sampled_ep) = 100, trace = from 5 to 13
ep_idx[i] = 286
buffer_index = 209 , len(sampled_ep) = 147, trace = from 76 to 84
ep_idx[i] = 315
buffer_index = 238 , len(sampled_ep) = 81, trace = from 59 to 67
ep_idx[i] = 270
buffer_index = 193 , len(s

ep_idx[i] = 310
buffer_index = 233 , len(sampled_ep) = 164, trace = from 155 to 163
ep_idx[i] = 340
buffer_index = 263 , len(sampled_ep) = 142, trace = from 6 to 14
ep_idx[i] = 160
buffer_index = 83 , len(sampled_ep) = 117, trace = from 8 to 16
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9916285000000691
epsilon is = 0.9916270000000691
epsilon is = 0.9916255000000691
epsilon is = 0.9916240000000691
epsilon is = 0.9916225000000691
Target Set Success
ep_idx = [135, 134, 323, 330, 360, 228, 373, 100, 334, 253, 153, 334, 194, 323, 112, 332, 315, 196, 287, 252, 334, 152, 245, 91, 97, 225, 222, 284, 286, 320, 312, 177]
exp_idx = [134, 138, 17, 162, 68, 10, 40, 15, 110, 46, 18, 107, 11, 19, 22, 45, 33, 59, 41, 58, 94, 10, 46, 14, 47, 55, 93, 77, 54, 54, 14, 71]
idx_offset = 77, self.episode_index = 376, len(self.buffer) = 300
ep_idx[i] = 135
buffer_index = 58 , len(sampled_ep) = 211, trace = from 127 to 135
ep_idx[i] = 134
buffer_index = 57 , len(sampled_ep) = 208, trace = from 131 to 139

ep_idx[i] = 308
buffer_index = 231 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 156
buffer_index = 79 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 284
buffer_index = 207 , len(sampled_ep) = 81, trace = from 73 to 81
ep_idx[i] = 330
buffer_index = 253 , len(sampled_ep) = 175, trace = from 47 to 55
ep_idx[i] = 157
buffer_index = 80 , len(sampled_ep) = 103, trace = from 49 to 57
ep_idx[i] = 315
buffer_index = 238 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 292
buffer_index = 215 , len(sampled_ep) = 81, trace = from 60 to 68
ep_idx[i] = 223
buffer_index = 146 , len(sampled_ep) = 112, trace = from 78 to 86
ep_idx[i] = 233
buffer_index = 156 , len(sampled_ep) = 45, trace = from 23 to 31
ep_idx[i] = 265
buffer_index = 188 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 236
buffer_index = 159 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 350
buffer_index = 273 , len(sampled_ep) = 198, trace = from 39 to 47
ep_idx[i] = 332
buffer_inde

epsilon is = 0.9915850000000694
Target Set Success
ep_idx = [188, 132, 165, 86, 293, 331, 191, 171, 184, 288, 284, 331, 370, 278, 292, 195, 272, 208, 135, 192, 222, 147, 334, 335, 136, 281, 363, 226, 349, 115, 330, 318]
exp_idx = [20, 49, 73, 61, 44, 45, 40, 16, 10, 58, 17, 46, 124, 17, 69, 47, 49, 52, 28, 22, 181, 64, 107, 77, 32, 119, 66, 22, 32, 207, 158, 38]
idx_offset = 77, self.episode_index = 376, len(self.buffer) = 300
ep_idx[i] = 188
buffer_index = 111 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 132
buffer_index = 55 , len(sampled_ep) = 81, trace = from 42 to 50
ep_idx[i] = 165
buffer_index = 88 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 86
buffer_index = 9 , len(sampled_ep) = 77, trace = from 54 to 62
ep_idx[i] = 293
buffer_index = 216 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 331
buffer_index = 254 , len(sampled_ep) = 84, trace = from 38 to 46
ep_idx[i] = 191
buffer_index = 114 , len(sampled_ep) = 131, trace = from 33 to 41
ep_idx

ep_idx[i] = 306
buffer_index = 229 , len(sampled_ep) = 117, trace = from 13 to 21
ep_idx[i] = 118
buffer_index = 41 , len(sampled_ep) = 111, trace = from 7 to 15
ep_idx[i] = 176
buffer_index = 99 , len(sampled_ep) = 80, trace = from 11 to 19
ep_idx[i] = 331
buffer_index = 254 , len(sampled_ep) = 84, trace = from 68 to 76
ep_idx[i] = 239
buffer_index = 162 , len(sampled_ep) = 101, trace = from 46 to 54
ep_idx[i] = 180
buffer_index = 103 , len(sampled_ep) = 141, trace = from 124 to 132
ep_idx[i] = 218
buffer_index = 141 , len(sampled_ep) = 80, trace = from 68 to 76
ep_idx[i] = 179
buffer_index = 102 , len(sampled_ep) = 77, trace = from 25 to 33
ep_idx[i] = 87
buffer_index = 10 , len(sampled_ep) = 76, trace = from 2 to 10
ep_idx[i] = 157
buffer_index = 80 , len(sampled_ep) = 103, trace = from 25 to 33
ep_idx[i] = 257
buffer_index = 180 , len(sampled_ep) = 82, trace = from 65 to 73
ep_idx[i] = 303
buffer_index = 226 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 332
buffer_index

buffer_index = 279 , len(sampled_ep) = 100, trace = from 13 to 21
ep_idx[i] = 141
buffer_index = 64 , len(sampled_ep) = 81, trace = from 2 to 10
ep_idx[i] = 237
buffer_index = 160 , len(sampled_ep) = 116, trace = from 64 to 72
ep_idx[i] = 297
buffer_index = 220 , len(sampled_ep) = 81, trace = from 53 to 61
ep_idx[i] = 153
buffer_index = 76 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 292
buffer_index = 215 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 322
buffer_index = 245 , len(sampled_ep) = 111, trace = from 41 to 49
ep_idx[i] = 247
buffer_index = 170 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 331
buffer_index = 254 , len(sampled_ep) = 84, trace = from 44 to 52
ep_idx[i] = 146
buffer_index = 69 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 160
buffer_index = 83 , len(sampled_ep) = 117, trace = from 74 to 82
ep_idx[i] = 123
buffer_index = 46 , len(sampled_ep) = 198, trace = from 141 to 149
ep_idx[i] = 125
buffer_index = 48 , len(samp

buffer_index = 258 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 330
buffer_index = 253 , len(sampled_ep) = 175, trace = from 54 to 62
ep_idx[i] = 274
buffer_index = 197 , len(sampled_ep) = 114, trace = from 9 to 17
ep_idx[i] = 372
buffer_index = 295 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 211
buffer_index = 134 , len(sampled_ep) = 81, trace = from 0 to 8
ep_idx[i] = 250
buffer_index = 173 , len(sampled_ep) = 49, trace = from 12 to 20
ep_idx[i] = 77
buffer_index = 0 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 222
buffer_index = 145 , len(sampled_ep) = 224, trace = from 113 to 121
ep_idx[i] = 264
buffer_index = 187 , len(sampled_ep) = 109, trace = from 6 to 14
ep_idx[i] = 334
buffer_index = 257 , len(sampled_ep) = 113, trace = from 88 to 96
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9915310000000699
epsilon is = 0.9915295000000699
epsilon is = 0.9915280000000699
epsilon is = 0.9915265000000699
length of poped element = 81 , cntr = 74 , di

ep_idx[i] = 270
buffer_index = 192 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 81
buffer_index = 3 , len(sampled_ep) = 81, trace = from 28 to 36
ep_idx[i] = 351
buffer_index = 273 , len(sampled_ep) = 143, trace = from 79 to 87
ep_idx[i] = 299
buffer_index = 221 , len(sampled_ep) = 100, trace = from 62 to 70
ep_idx[i] = 220
buffer_index = 142 , len(sampled_ep) = 118, trace = from 107 to 115
ep_idx[i] = 249
buffer_index = 171 , len(sampled_ep) = 75, trace = from 22 to 30
ep_idx[i] = 167
buffer_index = 89 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 212
buffer_index = 134 , len(sampled_ep) = 81, trace = from 31 to 39
ep_idx[i] = 112
buffer_index = 34 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 176
buffer_index = 98 , len(sampled_ep) = 80, trace = from 9 to 17
ep_idx[i] = 83
buffer_index = 5 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 207
buffer_index = 129 , len(sampled_ep) = 75, trace = from 13 to 21
ep_idx[i] = 135
buffer_index = 57 ,

buffer_index = 98 , len(sampled_ep) = 80, trace = from 61 to 69
ep_idx[i] = 342
buffer_index = 264 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 272
buffer_index = 194 , len(sampled_ep) = 70, trace = from 33 to 41
ep_idx[i] = 115
buffer_index = 37 , len(sampled_ep) = 299, trace = from 63 to 71
ep_idx[i] = 342
buffer_index = 264 , len(sampled_ep) = 81, trace = from 47 to 55
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9914935000000702
epsilon is = 0.9914920000000702
epsilon is = 0.9914905000000702
epsilon is = 0.9914890000000702
epsilon is = 0.9914875000000702
Target Set Success
ep_idx = [152, 243, 120, 146, 290, 345, 165, 307, 159, 204, 125, 211, 167, 305, 103, 326, 106, 373, 238, 330, 188, 256, 334, 373, 130, 312, 240, 222, 212, 94, 184, 234]
exp_idx = [15, 41, 37, 47, 128, 47, 18, 28, 42, 17, 18, 62, 76, 15, 39, 20, 94, 206, 18, 171, 35, 50, 109, 294, 64, 79, 47, 123, 38, 18, 14, 82]
idx_offset = 78, self.episode_index = 377, len(self.buffer) = 300
ep_idx[i] = 152
buff

buffer_index = 181 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 239
buffer_index = 161 , len(sampled_ep) = 101, trace = from 57 to 65
ep_idx[i] = 115
buffer_index = 37 , len(sampled_ep) = 299, trace = from 53 to 61
ep_idx[i] = 334
buffer_index = 256 , len(sampled_ep) = 113, trace = from 99 to 107
ep_idx[i] = 276
buffer_index = 198 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 157
buffer_index = 79 , len(sampled_ep) = 103, trace = from 42 to 50
ep_idx[i] = 97
buffer_index = 19 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 210
buffer_index = 132 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 334
buffer_index = 256 , len(sampled_ep) = 113, trace = from 91 to 99
ep_idx[i] = 281
buffer_index = 203 , len(sampled_ep) = 178, trace = from 29 to 37
ep_idx[i] = 153
buffer_index = 75 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 369
buffer_index = 291 , len(sampled_ep) = 75, trace = from 14 to 22
ep_idx[i] = 241
buffer_index = 163 , len(samp

epsilon is = 0.9914560000000705
epsilon is = 0.9914545000000705
epsilon is = 0.9914530000000705
epsilon is = 0.9914515000000705
epsilon is = 0.9914500000000706
Target Set Success
ep_idx = [351, 235, 100, 216, 222, 163, 164, 176, 187, 372, 324, 222, 283, 309, 197, 246, 312, 184, 330, 183, 97, 237, 185, 179, 334, 115, 240, 106, 240, 134, 115, 115]
exp_idx = [128, 13, 77, 10, 219, 7, 62, 60, 21, 68, 11, 204, 19, 47, 70, 21, 13, 50, 86, 8, 16, 45, 53, 10, 112, 220, 46, 94, 53, 72, 262, 44]
idx_offset = 78, self.episode_index = 377, len(self.buffer) = 300
ep_idx[i] = 351
buffer_index = 273 , len(sampled_ep) = 143, trace = from 121 to 129
ep_idx[i] = 235
buffer_index = 157 , len(sampled_ep) = 46, trace = from 6 to 14
ep_idx[i] = 100
buffer_index = 22 , len(sampled_ep) = 102, trace = from 70 to 78
ep_idx[i] = 216
buffer_index = 138 , len(sampled_ep) = 81, trace = from 3 to 11
ep_idx[i] = 222
buffer_index = 144 , len(sampled_ep) = 224, trace = from 212 to 220
ep_idx[i] = 163
buffer_index = 85 

buffer_index = 253 , len(sampled_ep) = 84, trace = from 21 to 29
ep_idx[i] = 104
buffer_index = 26 , len(sampled_ep) = 50, trace = from 3 to 11
ep_idx[i] = 348
buffer_index = 270 , len(sampled_ep) = 77, trace = from 46 to 54
ep_idx[i] = 156
buffer_index = 78 , len(sampled_ep) = 81, trace = from 1 to 9
ep_idx[i] = 234
buffer_index = 156 , len(sampled_ep) = 167, trace = from 5 to 13
ep_idx[i] = 334
buffer_index = 256 , len(sampled_ep) = 113, trace = from 40 to 48
ep_idx[i] = 135
buffer_index = 57 , len(sampled_ep) = 211, trace = from 41 to 49
ep_idx[i] = 234
buffer_index = 156 , len(sampled_ep) = 167, trace = from 105 to 113
ep_idx[i] = 172
buffer_index = 94 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 181
buffer_index = 103 , len(sampled_ep) = 43, trace = from 6 to 14
ep_idx[i] = 249
buffer_index = 171 , len(sampled_ep) = 75, trace = from 27 to 35
ep_idx[i] = 110
buffer_index = 32 , len(sampled_ep) = 137, trace = from 89 to 97
ep_idx[i] = 112
buffer_index = 34 , len(sampled

exp_idx = [22, 22, 26, 9, 41, 51, 37, 41, 47, 15, 62, 64, 79, 128, 15, 64, 30, 19, 148, 37, 28, 105, 16, 23, 21, 28, 47, 13, 10, 34, 143, 78]
idx_offset = 78, self.episode_index = 377, len(self.buffer) = 300
ep_idx[i] = 88
buffer_index = 10 , len(sampled_ep) = 114, trace = from 15 to 23
ep_idx[i] = 174
buffer_index = 96 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 171
buffer_index = 93 , len(sampled_ep) = 81, trace = from 19 to 27
ep_idx[i] = 311
buffer_index = 233 , len(sampled_ep) = 84, trace = from 2 to 10
ep_idx[i] = 79
buffer_index = 1 , len(sampled_ep) = 75, trace = from 34 to 42
ep_idx[i] = 154
buffer_index = 76 , len(sampled_ep) = 153, trace = from 44 to 52
ep_idx[i] = 120
buffer_index = 42 , len(sampled_ep) = 81, trace = from 30 to 38
ep_idx[i] = 84
buffer_index = 6 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 99
buffer_index = 21 , len(sampled_ep) = 83, trace = from 40 to 48
ep_idx[i] = 375
buffer_index = 297 , len(sampled_ep) = 80, trace = from 8 to

buffer_index = 44 , len(sampled_ep) = 198, trace = from 10 to 18
ep_idx[i] = 116
buffer_index = 37 , len(sampled_ep) = 162, trace = from 103 to 111
ep_idx[i] = 210
buffer_index = 131 , len(sampled_ep) = 81, trace = from 48 to 56
ep_idx[i] = 214
buffer_index = 135 , len(sampled_ep) = 75, trace = from 58 to 66
ep_idx[i] = 153
buffer_index = 74 , len(sampled_ep) = 81, trace = from 15 to 23
ep_idx[i] = 119
buffer_index = 40 , len(sampled_ep) = 183, trace = from 136 to 144
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 89 to 97
ep_idx[i] = 200
buffer_index = 121 , len(sampled_ep) = 82, trace = from 53 to 61
ep_idx[i] = 342
buffer_index = 263 , len(sampled_ep) = 81, trace = from 66 to 74
ep_idx[i] = 251
buffer_index = 172 , len(sampled_ep) = 133, trace = from 50 to 58
ep_idx[i] = 271
buffer_index = 192 , len(sampled_ep) = 46, trace = from 38 to 46
ep_idx[i] = 267
buffer_index = 188 , len(sampled_ep) = 83, trace = from 12 to 20
ep_idx[i] = 171
buffer_index = 92 , len

buffer_index = 121 , len(sampled_ep) = 82, trace = from 3 to 11
ep_idx[i] = 271
buffer_index = 192 , len(sampled_ep) = 46, trace = from 37 to 45
ep_idx[i] = 348
buffer_index = 269 , len(sampled_ep) = 77, trace = from 1 to 9
ep_idx[i] = 172
buffer_index = 93 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 148
buffer_index = 69 , len(sampled_ep) = 81, trace = from 46 to 54
ep_idx[i] = 180
buffer_index = 101 , len(sampled_ep) = 141, trace = from 8 to 16
ep_idx[i] = 297
buffer_index = 218 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 156
buffer_index = 77 , len(sampled_ep) = 81, trace = from 35 to 43
ep_idx[i] = 330
buffer_index = 251 , len(sampled_ep) = 175, trace = from 138 to 146
ep_idx[i] = 279
buffer_index = 200 , len(sampled_ep) = 48, trace = from 39 to 47
ep_idx[i] = 119
buffer_index = 40 , len(sampled_ep) = 183, trace = from 161 to 169
ep_idx[i] = 378
buffer_index = 299 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 147
buffer_index = 68 , len(sampl

buffer_index = 101 , len(sampled_ep) = 141, trace = from 15 to 23
ep_idx[i] = 273
buffer_index = 194 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 275
buffer_index = 196 , len(sampled_ep) = 81, trace = from 24 to 32
ep_idx[i] = 265
buffer_index = 186 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 91
buffer_index = 12 , len(sampled_ep) = 46, trace = from 11 to 19
ep_idx[i] = 202
buffer_index = 123 , len(sampled_ep) = 143, trace = from 63 to 71
ep_idx[i] = 145
buffer_index = 66 , len(sampled_ep) = 117, trace = from 73 to 81
ep_idx[i] = 195
buffer_index = 116 , len(sampled_ep) = 81, trace = from 31 to 39
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9913585000000713
epsilon is = 0.9913570000000713
epsilon is = 0.9913555000000713
epsilon is = 0.9913540000000713
epsilon is = 0.9913525000000714
Target Set Success
ep_idx = [146, 334, 159, 310, 154, 80, 371, 157, 313, 334, 279, 295, 268, 118, 196, 84, 213, 203, 370, 330, 232, 281, 310, 281, 311, 142, 118, 118, 367, 306,

buffer_index = 255 , len(sampled_ep) = 113, trace = from 99 to 107
ep_idx[i] = 289
buffer_index = 210 , len(sampled_ep) = 83, trace = from 14 to 22
ep_idx[i] = 146
buffer_index = 67 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 123
buffer_index = 44 , len(sampled_ep) = 198, trace = from 10 to 18
ep_idx[i] = 134
buffer_index = 55 , len(sampled_ep) = 208, trace = from 110 to 118
ep_idx[i] = 202
buffer_index = 123 , len(sampled_ep) = 143, trace = from 34 to 42
ep_idx[i] = 161
buffer_index = 82 , len(sampled_ep) = 114, trace = from 1 to 9
ep_idx[i] = 164
buffer_index = 85 , len(sampled_ep) = 118, trace = from 16 to 24
ep_idx[i] = 213
buffer_index = 134 , len(sampled_ep) = 148, trace = from 25 to 33
ep_idx[i] = 275
buffer_index = 196 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 319
buffer_index = 240 , len(sampled_ep) = 70, trace = from 62 to 70
ep_idx[i] = 176
buffer_index = 97 , len(sampled_ep) = 80, trace = from 15 to 23
ep_idx[i] = 344
buffer_index = 265 , len(s

buffer_index = 48 , len(sampled_ep) = 50, trace = from 29 to 37
ep_idx[i] = 100
buffer_index = 21 , len(sampled_ep) = 102, trace = from 67 to 75
ep_idx[i] = 110
buffer_index = 31 , len(sampled_ep) = 137, trace = from 114 to 122
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9913210000000716
epsilon is = 0.9913195000000716
epsilon is = 0.9913180000000716
epsilon is = 0.9913165000000717
epsilon is = 0.9913150000000717
Target Set Success
ep_idx = [229, 259, 170, 172, 234, 306, 321, 318, 167, 191, 89, 334, 355, 349, 226, 98, 106, 196, 310, 304, 306, 307, 231, 326, 147, 249, 342, 326, 298, 281, 132, 152]
exp_idx = [47, 43, 39, 18, 66, 65, 79, 44, 78, 50, 64, 105, 19, 211, 77, 23, 76, 99, 85, 12, 90, 23, 34, 76, 44, 36, 35, 56, 107, 69, 33, 96]
idx_offset = 79, self.episode_index = 378, len(self.buffer) = 300
ep_idx[i] = 229
buffer_index = 150 , len(sampled_ep) = 81, trace = from 40 to 48
ep_idx[i] = 259
buffer_index = 180 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 170
buffer

buffer_index = 235 , len(sampled_ep) = 81, trace = from 50 to 58
ep_idx[i] = 362
buffer_index = 283 , len(sampled_ep) = 104, trace = from 52 to 60
ep_idx[i] = 347
buffer_index = 268 , len(sampled_ep) = 81, trace = from 69 to 77
ep_idx[i] = 252
buffer_index = 173 , len(sampled_ep) = 81, trace = from 71 to 79
ep_idx[i] = 191
buffer_index = 112 , len(sampled_ep) = 131, trace = from 93 to 101
ep_idx[i] = 374
buffer_index = 295 , len(sampled_ep) = 174, trace = from 63 to 71
ep_idx[i] = 226
buffer_index = 147 , len(sampled_ep) = 120, trace = from 106 to 114
ep_idx[i] = 347
buffer_index = 268 , len(sampled_ep) = 81, trace = from 36 to 44
ep_idx[i] = 219
buffer_index = 140 , len(sampled_ep) = 110, trace = from 93 to 101
ep_idx[i] = 131
buffer_index = 52 , len(sampled_ep) = 81, trace = from 4 to 12
ep_idx[i] = 340
buffer_index = 261 , len(sampled_ep) = 142, trace = from 86 to 94
ep_idx[i] = 339
buffer_index = 260 , len(sampled_ep) = 78, trace = from 16 to 24
ep_idx[i] = 89
buffer_index = 10 , l

ep_idx = [100, 359, 262, 288, 293, 251, 157, 307, 92, 305, 338, 277, 214, 334, 373, 106, 109, 371, 362, 115, 278, 376, 331, 164, 354, 306, 176, 339, 107, 125, 144, 265]
exp_idx = [18, 31, 27, 16, 41, 38, 75, 17, 11, 15, 37, 79, 66, 111, 222, 65, 20, 158, 43, 267, 16, 9, 82, 48, 16, 89, 49, 32, 95, 56, 41, 59]
idx_offset = 79, self.episode_index = 378, len(self.buffer) = 300
ep_idx[i] = 100
buffer_index = 21 , len(sampled_ep) = 102, trace = from 11 to 19
ep_idx[i] = 359
buffer_index = 280 , len(sampled_ep) = 45, trace = from 24 to 32
ep_idx[i] = 262
buffer_index = 183 , len(sampled_ep) = 81, trace = from 20 to 28
ep_idx[i] = 288
buffer_index = 209 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 293
buffer_index = 214 , len(sampled_ep) = 81, trace = from 34 to 42
ep_idx[i] = 251
buffer_index = 172 , len(sampled_ep) = 133, trace = from 31 to 39
ep_idx[i] = 157
buffer_index = 78 , len(sampled_ep) = 103, trace = from 68 to 76
ep_idx[i] = 307
buffer_index = 228 , len(sampled_ep) = 4

buffer_index = 124 , len(sampled_ep) = 81, trace = from 7 to 15
ep_idx[i] = 139
buffer_index = 60 , len(sampled_ep) = 133, trace = from 41 to 49
ep_idx[i] = 103
buffer_index = 24 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 237
buffer_index = 158 , len(sampled_ep) = 116, trace = from 5 to 13
ep_idx[i] = 119
buffer_index = 40 , len(sampled_ep) = 183, trace = from 6 to 14
ep_idx[i] = 345
buffer_index = 266 , len(sampled_ep) = 108, trace = from 89 to 97
ep_idx[i] = 321
buffer_index = 242 , len(sampled_ep) = 81, trace = from 22 to 30
ep_idx[i] = 288
buffer_index = 209 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 66 to 74
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 102 to 110
ep_idx[i] = 275
buffer_index = 196 , len(sampled_ep) = 81, trace = from 62 to 70
ep_idx[i] = 126
buffer_index = 47 , len(sampled_ep) = 46, trace = from 24 to 32
ep_idx[i] = 368
buffer_index = 289 , len(sam

buffer_index = 255 , len(sampled_ep) = 113, trace = from 45 to 53
ep_idx[i] = 254
buffer_index = 175 , len(sampled_ep) = 116, trace = from 0 to 8
ep_idx[i] = 86
buffer_index = 7 , len(sampled_ep) = 77, trace = from 47 to 55
ep_idx[i] = 289
buffer_index = 210 , len(sampled_ep) = 83, trace = from 19 to 27
ep_idx[i] = 306
buffer_index = 227 , len(sampled_ep) = 117, trace = from 109 to 117
ep_idx[i] = 136
buffer_index = 57 , len(sampled_ep) = 108, trace = from 94 to 102
ep_idx[i] = 263
buffer_index = 184 , len(sampled_ep) = 51, trace = from 27 to 35
ep_idx[i] = 202
buffer_index = 123 , len(sampled_ep) = 143, trace = from 102 to 110
ep_idx[i] = 175
buffer_index = 96 , len(sampled_ep) = 102, trace = from 76 to 84
ep_idx[i] = 281
buffer_index = 202 , len(sampled_ep) = 178, trace = from 100 to 108
ep_idx[i] = 331
buffer_index = 252 , len(sampled_ep) = 84, trace = from 25 to 33
ep_idx[i] = 92
buffer_index = 13 , len(sampled_ep) = 281, trace = from 258 to 266
ep_idx[i] = 331
buffer_index = 252 ,

buffer_index = 54 , len(sampled_ep) = 81, trace = from 41 to 49
ep_idx[i] = 117
buffer_index = 38 , len(sampled_ep) = 81, trace = from 43 to 51
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 136
buffer_index = 57 , len(sampled_ep) = 108, trace = from 83 to 91
ep_idx[i] = 135
buffer_index = 56 , len(sampled_ep) = 211, trace = from 146 to 154
ep_idx[i] = 119
buffer_index = 40 , len(sampled_ep) = 183, trace = from 165 to 173
ep_idx[i] = 296
buffer_index = 217 , len(sampled_ep) = 108, trace = from 15 to 23
ep_idx[i] = 245
buffer_index = 166 , len(sampled_ep) = 75, trace = from 37 to 45
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 102 to 110
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9912235000000724
epsilon is = 0.9912220000000724
epsilon is = 0.9912205000000724
epsilon is = 0.9912190000000725
epsilon is = 0.9912175000000725
Target Set Success
ep_idx = [234, 141, 356, 370, 112, 175, 152, 334, 178, 210, 334, 2

ep_idx[i] = 258
buffer_index = 179 , len(sampled_ep) = 76, trace = from 31 to 39
ep_idx[i] = 309
buffer_index = 230 , len(sampled_ep) = 115, trace = from 45 to 53
ep_idx[i] = 339
buffer_index = 260 , len(sampled_ep) = 78, trace = from 6 to 14
ep_idx[i] = 135
buffer_index = 56 , len(sampled_ep) = 211, trace = from 159 to 167
ep_idx[i] = 158
buffer_index = 79 , len(sampled_ep) = 83, trace = from 51 to 59
ep_idx[i] = 136
buffer_index = 57 , len(sampled_ep) = 108, trace = from 31 to 39
ep_idx[i] = 113
buffer_index = 34 , len(sampled_ep) = 105, trace = from 24 to 32
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 101 to 109
ep_idx[i] = 193
buffer_index = 114 , len(sampled_ep) = 108, trace = from 15 to 23
ep_idx[i] = 158
buffer_index = 79 , len(sampled_ep) = 83, trace = from 44 to 52
ep_idx[i] = 182
buffer_index = 103 , len(sampled_ep) = 81, trace = from 67 to 75
ep_idx[i] = 243
buffer_index = 164 , len(sampled_ep) = 81, trace = from 39 to 47
ep_idx[i] = 280
buffer_i

buffer_index = 137 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 295
buffer_index = 216 , len(sampled_ep) = 81, trace = from 58 to 66
ep_idx[i] = 206
buffer_index = 127 , len(sampled_ep) = 81, trace = from 32 to 40
ep_idx[i] = 313
buffer_index = 234 , len(sampled_ep) = 81, trace = from 27 to 35
sampledTraces.shape = (32, 8, 6)
epsilon is = 0.9911860000000727
epsilon is = 0.9911845000000727
epsilon is = 0.9911830000000728
epsilon is = 0.9911815000000728
epsilon is = 0.9911800000000728
Target Set Success
ep_idx = [153, 260, 129, 315, 163, 348, 347, 291, 159, 354, 330, 292, 157, 352, 343, 157, 350, 208, 350, 115, 329, 303, 374, 360, 117, 127, 216, 234, 370, 199, 377, 316]
exp_idx = [61, 105, 13, 74, 19, 51, 21, 102, 47, 32, 86, 69, 46, 40, 35, 42, 49, 67, 12, 15, 20, 18, 25, 42, 20, 21, 17, 143, 155, 51, 18, 8]
idx_offset = 79, self.episode_index = 378, len(self.buffer) = 300
ep_idx[i] = 153
buffer_index = 74 , len(sampled_ep) = 81, trace = from 54 to 62
ep_idx[i] = 260
buffer

buffer_index = 252 , len(sampled_ep) = 84, trace = from 66 to 74
ep_idx[i] = 328
buffer_index = 249 , len(sampled_ep) = 75, trace = from 17 to 25
ep_idx[i] = 237
buffer_index = 158 , len(sampled_ep) = 116, trace = from 39 to 47
ep_idx[i] = 348
buffer_index = 269 , len(sampled_ep) = 77, trace = from 47 to 55
ep_idx[i] = 82
buffer_index = 3 , len(sampled_ep) = 73, trace = from 33 to 41
ep_idx[i] = 246
buffer_index = 167 , len(sampled_ep) = 47, trace = from 10 to 18
ep_idx[i] = 116
buffer_index = 37 , len(sampled_ep) = 162, trace = from 11 to 19
ep_idx[i] = 154
buffer_index = 75 , len(sampled_ep) = 153, trace = from 43 to 51
ep_idx[i] = 103
buffer_index = 24 , len(sampled_ep) = 81, trace = from 29 to 37
ep_idx[i] = 362
buffer_index = 283 , len(sampled_ep) = 104, trace = from 66 to 74
ep_idx[i] = 256
buffer_index = 177 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 87 to 95
ep_idx[i] = 163
buffer_index = 84 , len(sampl

epsilon is = 0.9911425000000731
Target Set Success
ep_idx = [332, 286, 115, 125, 332, 261, 110, 287, 162, 278, 135, 242, 140, 350, 200, 289, 363, 203, 134, 177, 125, 272, 323, 319, 84, 134, 85, 346, 225, 303, 339, 287]
exp_idx = [56, 32, 206, 50, 13, 19, 92, 99, 19, 24, 192, 49, 15, 171, 14, 11, 48, 64, 22, 19, 27, 9, 34, 57, 18, 113, 94, 44, 14, 22, 16, 143]
idx_offset = 79, self.episode_index = 378, len(self.buffer) = 300
ep_idx[i] = 332
buffer_index = 253 , len(sampled_ep) = 81, trace = from 49 to 57
ep_idx[i] = 286
buffer_index = 207 , len(sampled_ep) = 147, trace = from 25 to 33
ep_idx[i] = 115
buffer_index = 36 , len(sampled_ep) = 299, trace = from 199 to 207
ep_idx[i] = 125
buffer_index = 46 , len(sampled_ep) = 75, trace = from 43 to 51
ep_idx[i] = 332
buffer_index = 253 , len(sampled_ep) = 81, trace = from 6 to 14
ep_idx[i] = 261
buffer_index = 182 , len(sampled_ep) = 81, trace = from 12 to 20
ep_idx[i] = 110
buffer_index = 31 , len(sampled_ep) = 137, trace = from 85 to 93
ep_i

buffer_index = 148 , len(sampled_ep) = 112, trace = from 10 to 18
ep_idx[i] = 132
buffer_index = 53 , len(sampled_ep) = 81, trace = from 37 to 45
ep_idx[i] = 255
buffer_index = 176 , len(sampled_ep) = 81, trace = from 51 to 59
ep_idx[i] = 83
buffer_index = 4 , len(sampled_ep) = 81, trace = from 68 to 76
ep_idx[i] = 256
buffer_index = 177 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 144
buffer_index = 65 , len(sampled_ep) = 81, trace = from 72 to 80
ep_idx[i] = 246
buffer_index = 167 , len(sampled_ep) = 47, trace = from 33 to 41
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 92 to 100
ep_idx[i] = 311
buffer_index = 232 , len(sampled_ep) = 84, trace = from 68 to 76
ep_idx[i] = 318
buffer_index = 239 , len(sampled_ep) = 81, trace = from 57 to 65
ep_idx[i] = 106
buffer_index = 27 , len(sampled_ep) = 103, trace = from 73 to 81
ep_idx[i] = 169
buffer_index = 90 , len(sampled_ep) = 81, trace = from 14 to 22
ep_idx[i] = 290
buffer_index = 211 , len(sampled

buffer_index = 139 , len(sampled_ep) = 80, trace = from 12 to 20
ep_idx[i] = 229
buffer_index = 150 , len(sampled_ep) = 81, trace = from 13 to 21
ep_idx[i] = 195
buffer_index = 116 , len(sampled_ep) = 81, trace = from 10 to 18
ep_idx[i] = 246
buffer_index = 167 , len(sampled_ep) = 47, trace = from 17 to 25
ep_idx[i] = 306
buffer_index = 227 , len(sampled_ep) = 117, trace = from 2 to 10
ep_idx[i] = 172
buffer_index = 93 , len(sampled_ep) = 81, trace = from 11 to 19
ep_idx[i] = 334
buffer_index = 255 , len(sampled_ep) = 113, trace = from 79 to 87
ep_idx[i] = 213
buffer_index = 134 , len(sampled_ep) = 148, trace = from 10 to 18
ep_idx[i] = 171
buffer_index = 92 , len(sampled_ep) = 81, trace = from 9 to 17
ep_idx[i] = 253
buffer_index = 174 , len(sampled_ep) = 76, trace = from 2 to 10
ep_idx[i] = 371
buffer_index = 292 , len(sampled_ep) = 299, trace = from 52 to 60
ep_idx[i] = 231
buffer_index = 152 , len(sampled_ep) = 77, trace = from 6 to 14
ep_idx[i] = 305
buffer_index = 226 , len(sampl

In [7]:
print(type(myBuffer))
print(type(myBuffer.buffer))
print(len(myBuffer.buffer))
#myBuffer.save(path2mdl)
#myBuffer = experience_buffer()
#myBuffer.load(path2mdl)
print(type(myBuffer.buffer))
print(len(myBuffer.buffer))
print(myBuffer.buffer[98].shape)

<class '__main__.experience_buffer'>
<class 'list'>
100
<class 'list'>
100
(51, 6)


In [11]:
trainBatch = myBuffer.PRsample(batch_size,trace_length, i, num_episodes)

ValueError: operands could not be broadcast together with shapes (270,1,1,6) (7056,) 

In [11]:
print(td)
episode = episodeBuffer
episode = np.reshape(np.array(episode),[len(episode),6])
print(episode.shape)
print(episode[:, 5].shape)
td_error = episode[:, 5]
priority = np.absolute(td_error) + 1e-9 #proportionnal priority
print(priority.shape)
#Append episode to the priority replay tuple list
#Every experience in the episode has a tuple of the form:
#(episode_index, experience_index, priority)
episode_index = 0

exp_prio_tuples = []
for experience_index in range(trace_length-1, len(episode[:, 5])):
    tup = (episode_index, experience_index, float(priority[experience_index]))
    exp_prio_tuples.append(tup)


print(exp_prio_tuples[2])
print(exp_prio_tuples[0][0])

priorities_poweralpha = np.power([tup[2] for tup in exp_prio_tuples],myBuffer.alpha)
print(myBuffer.alpha)
print(priorities_poweralpha.shape)
sum_priorities_poweralpha = np.sum(priorities_poweralpha)
print(sum_priorities_poweralpha)
sampling_probabilities = np.divide(priorities_poweralpha, sum_priorities_poweralpha)
print(sampling_probabilities.shape)
sampled_indexes = np.random.choice(len(exp_prio_tuples), batch_size, p = sampling_probabilities)
print(sampled_indexes)

sampled_tuples = [exp_prio_tuples[idx] for idx in sampled_indexes]
ep_idx = [tup[0] for tup in sampled_tuples]
print(ep_idx)
exp_idx = [tup[1] for tup in sampled_tuples]
print(exp_idx)
sampledTraces = []
for i in range(0,batch_size):
    sampled_ep = myBuffer.buffer[ep_idx[i]]
    sampled_ep = np.reshape(np.array(sampled_ep),[len(sampled_ep),6])
    sampled_exp = sampled_ep[exp_idx[i]-(trace_length):exp_idx[i]] 
    sampledTraces.append(sampled_exp)
sampledTraces = np.array(sampledTraces)
print(sampledTraces.shape)
result = np.reshape(sampledTraces,[batch_size*trace_length,6])

[ 0.00737692]
(21, 6)
(21,)
(21,)
(0, 9, 0.006785107310861349)
0
0.4997
(14,)
1.22897394469
(14,)
[7 2 1 8]
[0, 0, 0, 0]
[14, 9, 8, 15]
(4, 8, 6)


In [13]:
print(sampledTraces[3].shape)
print(exp_prio_tuples[0][0])
print(len(exp_prio_tuples))
exp_prio_tuples.pop(0)
print(len(exp_prio_tuples))

(8, 6)
0
14
13
