# Deep Recurrent Q-Network for VizDoom


In [1]:
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy.misc
import os
import csv
import itertools
import tensorflow.contrib.slim as slim
%matplotlib inline

from helper2 import *

### Load the game environment

In [2]:
from vizdoom import *
a_size = 3 # Agent can move Left, Right, or Fire
image_size = 84

use_other_buffers = False #Wheter we use the depth buffer and label buffer instead of the screen buffer
use_RGB = False #Whether we use GRB or black and white
if use_other_buffers == True:
    image_chls = 2
else:
    if use_RGB == True:
        image_chls = 3
    else:
        image_chls = 1

#The Below code is related to setting up the Doom environment
game = DoomGame()
game.set_doom_scenario_path("defend_the_center.wad")  #This corresponds to the simple task we will pose our agent
game.load_config("defend_the_center.cfg")
game.set_doom_map("map01")
game.set_screen_resolution(ScreenResolution.RES_160X120)

if use_RGB == True:
    game.set_screen_format(ScreenFormat.RGB8)
else:
    game.set_screen_format(ScreenFormat.GRAY8)
    
game.set_render_hud(False)
game.set_render_crosshair(False)
game.set_render_weapon(True)
game.set_render_decals(False)
game.set_render_particles(False)

#Enable other usefull buffers for test purpose
game.set_depth_buffer_enabled(True)
game.set_automap_buffer_enabled(True)
game.set_labels_buffer_enabled(True)

game.add_available_button(Button.TURN_LEFT)
game.add_available_button(Button.TURN_RIGHT)
game.add_available_button(Button.ATTACK)
actions_list = np.identity(a_size,dtype=bool).tolist()
print(actions_list)

game.add_available_game_variable(GameVariable.AMMO2)
game.add_available_game_variable(GameVariable.POSITION_X)
game.add_available_game_variable(GameVariable.POSITION_Y)
game.set_episode_timeout(300)
game.set_episode_start_time(0)
game.set_window_visible(False)
game.set_sound_enabled(False)
#game.set_living_reward(-1)
game.set_mode(Mode.PLAYER)
game.init()

#End Doom set-up

env = game

[[True, False, False], [False, True, False], [False, False, True]]


### Implementing the network itself

In [3]:
class Qnetwork():
    def __init__(self,h_size,rnn_cell,myScope):
        #The network recieves a frame from the game, flattened into an array.
        #It then resizes it and processes it through four convolutional layers.
        
            
        self.scalarInput =  tf.placeholder(shape=[None,image_size * image_size * image_chls],dtype=tf.float32)
        self.imageIn = tf.reshape(self.scalarInput,shape=[-1,image_size,image_size,image_chls])
        self.conv1 = slim.convolution2d( \
            inputs=self.imageIn,num_outputs=32,\
            kernel_size=[8,8],stride=[4,4],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv1')
        self.conv2 = slim.convolution2d( \
            inputs=self.conv1,num_outputs=64,\
            kernel_size=[4,4],stride=[2,2],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv2')
        self.conv3 = slim.convolution2d( \
            inputs=self.conv2,num_outputs=64,\
            kernel_size=[3,3],stride=[1,1],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv3')
        self.conv4 = slim.convolution2d( \
            inputs=self.conv3,num_outputs=h_size,\
            kernel_size=[7,7],stride=[1,1],padding='VALID', \
            biases_initializer=None,scope=myScope+'_conv4')
        
        self.trainLength = tf.placeholder(dtype=tf.int32)
        #We take the output from the final convolutional layer and send it to a recurrent layer.
        #The input must be reshaped into [batch x trace x units] for rnn processing, 
        #and then returned to [batch x units] when sent through the upper levles.
        self.batch_size = tf.placeholder(dtype=tf.int32,shape=[])
        self.convFlat = tf.reshape(slim.flatten(self.conv4),[self.batch_size,self.trainLength,h_size])
        self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32)
        self.rnn,self.rnn_state = tf.nn.dynamic_rnn(\
                inputs=self.convFlat,cell=rnn_cell,dtype=tf.float32,initial_state=self.state_in,scope=myScope+'_rnn')
        self.rnn = tf.reshape(self.rnn,shape=[-1,h_size])
        #The output from the recurrent player is then split into separate Value and Advantage streams
        self.streamA,self.streamV = tf.split(self.rnn,2,1)
        self.AW = tf.Variable(tf.random_normal([h_size//2,a_size]))
        self.VW = tf.Variable(tf.random_normal([h_size//2,1]))
        self.Advantage = tf.matmul(self.streamA,self.AW)
        self.Value = tf.matmul(self.streamV,self.VW)
        
        self.salience = tf.gradients(self.Advantage,self.imageIn)
        #Then combine them together to get our final Q-values.
        self.Qout = self.Value + tf.subtract(self.Advantage,tf.reduce_mean(self.Advantage,axis=1,keep_dims=True))
        self.predict = tf.argmax(self.Qout,1)
        
        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.targetQ = tf.placeholder(shape=[None],dtype=tf.float32)
        self.actions = tf.placeholder(shape=[None],dtype=tf.int32)
        self.actions_onehot = tf.one_hot(self.actions,a_size,dtype=tf.float32)
        
        self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1)
        
        self.td_error = tf.square(self.targetQ - self.Q)
        
        #In order to only propogate accurate gradients through the network, we will mask the first
        #half of the losses for each trace as per Lample & Chatlot 2016
        self.maskA = tf.zeros([self.batch_size,self.trainLength//2])
        self.maskB = tf.ones([self.batch_size,self.trainLength//2])
        self.mask = tf.concat([self.maskA,self.maskB],1)
        self.mask = tf.reshape(self.mask,[-1])
        self.loss = tf.reduce_mean(self.td_error * self.mask)
        
        #Learning rate is hard-coded to 0.0001
        self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001)
        self.updateModel = self.trainer.minimize(self.loss)

### Experience Replay

These classes allow us to store experies and sample then randomly to train the network.
Episode buffer stores experiences for each individal episode.
Experience buffer stores entire episodes of experience, and sample() allows us to get training batches needed from the network.

In [21]:
class experience_buffer():
    def __init__(self, trace_length = 8, buffer_size = 1000):
        self.buffer = []
        self.buffer_size = buffer_size
        self.trace_length = trace_length

        #Initialize counters and buffers for prioritixed replay
        self.episode_index = 0
        self.alpha0 = 0.5 #Start-value of alpha, the prioritized replay probability exponent. Annealing is linear to 0.
        self.alpha = self.alpha0
        self.exp_prio_tuples = []
        
    def add(self,episode):
        #Compute the sampling priority of this episode in episode replay and update the sum of priorities
        episode = np.reshape(np.array(episode),[len(episode),6])
        self.td_error = episode[:, 5]
        self.priority = np.absolute(self.td_error) + 1e-18 #proportionnal priority
        #Append episode to the priority replay tuple list
        #Every experience in the episode has a tuple of the form:
        #(episode_index, experience_index, priority, len(episode))
        for experience_index in range(self.trace_length-1, len(episode)-1):
            tup = (self.episode_index, experience_index, float(self.priority[experience_index]), len(episode))
            self.exp_prio_tuples.append(tup)
        #Need something to avoid self.exp_prio_tuples to grow infinitly
        if len(self.buffer) + 1 > self.buffer_size:
            cntr = 0
            for k in range(0, self.exp_prio_tuples[0][3]-1):
                self.exp_prio_tuples.pop(0)
                cntr += 1
            
            print('length of poped element = ' + str(len(self.buffer[0]))+ ' ,cntr = ' + str(cntr))
            self.buffer.pop(0)
            
        self.episode_index += 1
        #self.episode_index = (self.episode_index + 1) % self.buffer_size
        self.buffer.append(episode)
        
        '''
        if len(self.buffer) + 1 >= self.buffer_size:
            self.buffer[0:(1+len(self.buffer))-self.buffer_size] = []
        self.buffer.append(episode)
        '''
            
    def sample(self,batch_size):
        #Ramdomly select a number of episodes egual to batch_size 
        sampled_episodes = random.sample(self.buffer,batch_size)
        #Within the selected episodes, randomly select an experience trace of length trace_length
        sampledTraces = []
        for episode in sampled_episodes:
            point = np.random.randint(0,len(episode)+1-self.trace_length)
            sampledTraces.append(episode[point:point+self.trace_length])
        sampledTraces = np.array(sampledTraces)
        
        return np.reshape(sampledTraces,[batch_size*self.trace_length,6])
    
    def PRsample(self,batch_size):
        #alpha annealing
        self.alpha = self.alpha0 - (self.episode_index * self.alpha0/num_episodes)
        #Compute the sampling probability distribution

        
        priorities_poweralpha = np.power([tup[2] for tup in self.exp_prio_tuples],myBuffer.alpha)
        sum_priorities_poweralpha = np.sum(priorities_poweralpha)
        sampling_probabilities = np.divide(priorities_poweralpha, sum_priorities_poweralpha)
        #Sample episodes using the computed distribution
        sampled_indexes = np.random.choice(len(self.exp_prio_tuples), batch_size, p = sampling_probabilities)
        sampled_tuples = [self.exp_prio_tuples[idx] for idx in sampled_indexes]
        ep_idx = [tup[0] for tup in sampled_tuples]
        print(ep_idx)
        exp_idx = [tup[1] for tup in sampled_tuples]
        print(exp_idx)
        sampledTraces = []
        if self.episode_index <= self.buffer_size:
            idx_offset = 0
        else:
            idx_offset = self.episode_index - self.buffer_size
        print('idx_offset = ' + str(idx_offset) + ', self.episode_index = ' + str(self.episode_index) + ', len(self.buffer) = ' + str(len(self.buffer)))
        for i in range(0,batch_size):
            sampled_ep = myBuffer.buffer[ep_idx[i] - idx_offset]#does This not work?
            print('len(sampled_ep) =' + str(len(sampled_ep)))
            sampled_ep = np.reshape(np.array(sampled_ep),[len(sampled_ep),6])
            sampled_trace = sampled_ep[exp_idx[i]+1-self.trace_length:exp_idx[i]+1]
            sampledTraces.append(sampled_trace)
        sampledTraces = np.array(sampledTraces)
        print(sampledTraces.shape)
        return np.reshape(sampledTraces,[batch_size*self.trace_length,6])
    
    def save(self, path2mdl):
        #Save only last 40 experiences in buffer otherwise ridiculously large file
        np.save(path2mdl + '/experienceBuffer.npy', self.buffer[-40:])
    
    def load(self, path2mdl):
        self.buffer = list(np.load(path2mdl + '/experienceBuffer.npy'))

### Training the network

In [22]:
#Setting the training parameters
batch_size = 4 #How many experience traces to use for each training step.
trace_length = 8 #How long each experience trace will be when training
update_freq = 5 #How often to perform a training step.
y = .99 #Discount factor on the target Q-values
startE = 1 #Starting chance of random action
endE = 0.1 #Final chance of random action

prioritized_replay = True
load_model = False #Whether to load a saved model.
if load_model == True:
    last_saved_ep = 3000 #This parameter has to be updated to the last checkpoint
else:
    last_saved_ep = 0
path2mdl = "../DeepRL-Agents-Results/drqn" #The path to save our model to.
path2center = "../DeepRL-Agents-Results/Center" #The path to save the Center information to
h_size = 512 #The size of the final convolutional layer before splitting it into Advantage and Value streams.
buffer_size = 10 #Size of the episode buffer in number of episodes
max_epLength = 300 #The max allowed length of our episode.
anneling_steps = max_epLength*100 #How many steps of training to reduce startE to endE.
num_episodes = 10000 #How many episodes of game environment to train network with.
pre_train_steps = max_epLength*1 #max_epLength*100 #How many steps of random actions before training begins. need to be a multiple of max_epLength
time_per_step =  0.025 #Length of each step used in gif creation
summaryLength = 100 #Number of epidoes to periodically save for analysis
tau = 0.001 #Rate at with the target network is update in regards to the main network

In [23]:
#


#We define the cells for the primary and target q-networks

tf.reset_default_graph()

cell = tf.contrib.rnn.BasicLSTMCell(num_units=h_size,state_is_tuple=True)
cellT = tf.contrib.rnn.BasicLSTMCell(num_units=h_size,state_is_tuple=True)
mainQN = Qnetwork(h_size,cell,'main')
targetQN = Qnetwork(h_size,cellT,'target')
trainables = tf.trainable_variables()
init = tf.global_variables_initializer()
targetOps = updateTargetGraph(trainables,tau)
saver = tf.train.Saver(max_to_keep=5)

#create lists to contain total rewards and steps per episode
jList = []
rList = []


#Set the rate of random action decrease. 
e = startE
stepDrop = (startE - endE)/anneling_steps

#Make a path for our model to be saved in.
if not os.path.exists(path2mdl):
    os.makedirs(path2mdl)

with tf.Session() as sess:
    if load_model == True:
        print ('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(path2mdl)
        saver.restore(sess,ckpt.model_checkpoint_path)
        #Rough (over)estimate of the total number of steps since the beginning of training
        total_steps = last_saved_ep*300/update_freq
        myBuffer = experience_buffer(trace_length, buffer_size)
        myBuffer.load(path2mdl)
    else:
        #INITIALIZE VARIABLES AND MODEL


        myBuffer = experience_buffer(trace_length,buffer_size)

        total_steps = 0
        
        sess.run(init)
        #Write the first line of the master log-file for the Control Center
        with open(path2center + '/log.csv', 'w') as myfile:
            wr = csv.writer(myfile, quoting=csv.QUOTE_ALL, lineterminator = '\n')
            wr.writerow(['Episode','Length','Reward','IMG','LOG','SAL'])   
        #Set the target network to be equal to the primary network.
        updateTarget(targetOps,sess)
    
    for i in range(last_saved_ep, num_episodes):
        #print(i)
        episodeBuffer = []
        
        #Reset environment and get first new observation
        env.new_episode()
        if use_other_buffers == True:
            st = game.get_state()
            dP = st.depth_buffer
            lP = st.labels_buffer
            sP = st.screen_buffer
            s = processBuffers(image_size, dP, lP, sP)
        else:
            sP = env.get_state().screen_buffer
            s = processImage(sP, image_size)
        d = False
        rAll = 0
        j = 0
        #Reset the recurrent layer's hidden state every episode
        state = (np.zeros([1,h_size]),np.zeros([1,h_size])) 
        #The Q-Network
        while j < max_epLength:
            
            if image_chls == 2:
                s_in = s[0:-image_size*image_size]
            else:
                s_in = s
            j+=1
            #Choose an action by greedily (with e chance of random action) from the Q-network
            if np.random.rand(1) < e or total_steps < pre_train_steps:
                #Only update the state of the RNN layer

                state1 = sess.run(mainQN.rnn_state,
                                  feed_dict={mainQN.scalarInput:[s_in/255.0],
                                             mainQN.trainLength:1, 
                                             mainQN.state_in:state,
                                             mainQN.batch_size:1})
                #Choose an action randomly
                a = np.random.randint(0,a_size)
                
            else:
                #Update the state of the RNN layer AND choose the best action
                a, state1 = sess.run([mainQN.predict,mainQN.rnn_state],
                                     feed_dict={mainQN.scalarInput:[s_in/255.0],
                                                mainQN.trainLength:1,
                                                mainQN.state_in:state,
                                                mainQN.batch_size:1})
                a = a[0]
                
            r = env.make_action(actions_list[a])
            d = env.is_episode_finished()
            if d == False:
                if use_other_buffers == True:
                    st1 = game.get_state()
                    d1P = st1.depth_buffer
                    l1P = st1.labels_buffer
                    s1P = st1.screen_buffer
                    s1 = processBuffers(image_size, d1P, l1P, s1P)
                else:
                    s1P = env.get_state().screen_buffer
                    s1 = processImage(s1P, image_size)
            else:
                break

            total_steps += 1
            
            #Compute the td error to use for prioritized replay
            if image_chls == 2:
                s1_in = s1[0:-image_size*image_size]
            else:
                s1_in = s1
            
            Q1 = sess.run(mainQN.predict,
                          feed_dict={mainQN.scalarInput:[s1_in/255.0],
                                     mainQN.trainLength:1,
                                     mainQN.state_in:state1,
                                     mainQN.batch_size:1})
                    
            Q2 = sess.run(targetQN.Qout,
                          feed_dict={targetQN.scalarInput:[s1_in/255.0],
                                     targetQN.trainLength:1,
                                     targetQN.state_in:state1,
                                     targetQN.batch_size:1})
            
            #print('Q1.shape = ' + str(Q1.shape))
            #print('Q2.shape = ' + str(Q2.shape))        
            end_multiplier = -(d - 1)
            doubleQ = Q2[0, Q1]
            #print('doubleQ.shape = ' + str(doubleQ.shape))
            targetQ = r + (y*doubleQ * end_multiplier)
            #print('targetQ.shape = ' + str(targetQ.shape))
            currentaction = np.array(a, ndmin=1)
            #print('currentaction.shape = ' + str(currentaction.shape))

            td = sess.run(mainQN.td_error,
                     feed_dict={mainQN.scalarInput:[s_in/255.0],
                                mainQN.targetQ:targetQ,
                                mainQN.actions:currentaction,
                                mainQN.trainLength:1,
                                mainQN.state_in:state,
                                mainQN.batch_size:1})
            

            
            episodeBuffer.append(np.reshape(np.array([s,a,r,s1,d,td]),[1,6]))
            
            if total_steps > pre_train_steps:
                if e > endE:
                    e -= stepDrop
                
                #Update the networks at a cetain frequency (every n experiences)
                if total_steps % (update_freq) == 0:
                    updateTarget(targetOps,sess)
                    #Reset the recurrent layer's hidden state
                    state_train = (np.zeros([batch_size,h_size]),np.zeros([batch_size,h_size])) 
                    #Get a random batch of experiences.
                    if prioritized_replay == True:
                        trainBatch = myBuffer.PRsample(batch_size)
                    else:
                        trainBatch = myBuffer.sample(batch_size)

                    train_s = list(zip(trainBatch[:, 0]))
                    train_s1 = list(zip(trainBatch[:, 3]))
                    train_s = np.vstack(train_s)
                    train_s1 = np.vstack(train_s1)

                    if image_chls == 2:
                        train_s = train_s[:,0:-image_size*image_size]
                        train_s1 = train_s1[:,0:-image_size*image_size]

                    #Below we perform the Double-DQN update to the target Q-values
                    Q1 = sess.run(mainQN.predict,
                                  feed_dict={mainQN.scalarInput:np.vstack(train_s1/255.0),
                                             mainQN.trainLength:trace_length,
                                             mainQN.state_in:state_train,
                                             mainQN.batch_size:batch_size})
                    
                    Q2 = sess.run(targetQN.Qout,
                                  feed_dict={targetQN.scalarInput:np.vstack(train_s1/255.0),
                                             targetQN.trainLength:trace_length,
                                             targetQN.state_in:state_train,
                                             targetQN.batch_size:batch_size})
                    
                    end_multiplier = -(trainBatch[:,4] - 1)
                    doubleQ = Q2[range(batch_size*trace_length),Q1]
                    targetQ = trainBatch[:,2] + (y*doubleQ * end_multiplier)
                    
                    #Update the network with our target values.
                    sess.run(mainQN.updateModel,
                             feed_dict={mainQN.scalarInput:np.vstack(train_s/255.0),
                                        mainQN.targetQ:targetQ,
                                        mainQN.actions:trainBatch[:,1],
                                        mainQN.trainLength:trace_length,
                                        mainQN.state_in:state_train,
                                        mainQN.batch_size:batch_size})
            rAll += r
            s = s1
            state = state1
            
            if use_other_buffers == True:
                lP = l1P
                dP = d1P
                sP = s1P
            else:
                sP = s1P
            

            if d == True:

                break

        #Add the episode to the experience buffer
        bufferArray = np.array(episodeBuffer)
        episodeBuffer = list(zip(bufferArray))
        myBuffer.add(episodeBuffer)
        jList.append(j)
        rList.append(rAll)

        #Periodically save the model. 
        if i % 1000 == 0 and i != last_saved_ep:
            saver.save(sess, path2mdl + '/model-'+str(i)+'.cptk', global_step = i)
            myBuffer.save(path2mdl)
            print ("Saved Model")
        if len(rList) % summaryLength == 0 and len(rList) != 0:
            #print (total_steps,np.mean(rList[-summaryLength:]), e)
            saveToCenter(i,rList,jList,
                         np.reshape(np.array(episodeBuffer),[len(episodeBuffer),6]),
                         summaryLength,
                         h_size,sess,mainQN,time_per_step,
                         image_size, image_chls, image_chls,
                         path2center)
    saver.save(sess,path2mdl + '/model-'+str(i)+'.cptk')

Target Set Success
Target Set Success
[6, 7, 0, 2]
[88, 222, 71, 251]
idx_offset = 0, self.episode_index = 10, len(self.buffer) = 10
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =268
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[9, 9, 7, 6]
[39, 31, 175, 161]
idx_offset = 0, self.episode_index = 10, len(self.buffer) = 10
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[0, 7, 9, 1]
[116, 292, 40, 240]
idx_offset = 0, self.episode_index = 10, len(self.buffer) = 10
len(sampled_ep) =268
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[1, 9, 8, 5]
[39, 20, 198, 250]
idx_offset = 0, self.episode_index = 10, len(self.buffer) = 10
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[3, 1, 0, 1]
[138, 102, 31, 11]
idx_offset = 0, self.episode_index = 10, len(self.buffer) = 10
len(sampled_ep) =270
len(sampled_e

Target Set Success
[2, 4, 3, 10]
[35, 35, 213, 234]
idx_offset = 0, self.episode_index = 11, len(self.buffer) = 11
len(sampled_ep) =294
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[6, 9, 3, 6]
[223, 60, 14, 51]
idx_offset = 0, self.episode_index = 11, len(self.buffer) = 11
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[8, 0, 10, 1]
[20, 204, 180, 67]
idx_offset = 0, self.episode_index = 11, len(self.buffer) = 11
len(sampled_ep) =270
len(sampled_ep) =268
len(sampled_ep) =282
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[2, 0, 3, 1]
[292, 170, 25, 133]
idx_offset = 0, self.episode_index = 11, len(self.buffer) = 11
len(sampled_ep) =294
len(sampled_ep) =268
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[3, 10, 3, 0]
[239, 183, 178, 40]
idx_offset = 0, self.episode_index = 11, len(self.buffer) = 11
len(sampled_ep) =270
len(sampled_ep) =282
len(samp

Target Set Success
[11, 8, 10, 6]
[222, 100, 35, 111]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =282
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[10, 7, 1, 7]
[150, 211, 215, 124]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[5, 4, 8, 10]
[139, 40, 254, 270]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[2, 5, 6, 6]
[32, 174, 25, 16]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =294
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[6, 4, 8, 0]
[38, 144, 255, 147]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =270
len(sampled_ep) =270
len(

Target Set Success
[7, 1, 10, 10]
[138, 252, 239, 196]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[5, 9, 5, 8]
[257, 166, 259, 240]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[2, 7, 3, 9]
[91, 196, 54, 51]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =294
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[2, 10, 1, 1]
[161, 241, 191, 278]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =294
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[4, 7, 7, 4]
[230, 197, 219, 80]
idx_offset = 0, self.episode_index = 12, len(self.buffer) = 12
len(sampled_ep) =270
len(sampled_ep) =299
len

Target Set Success
[8, 6, 2, 4]
[203, 102, 22, 223]
idx_offset = 0, self.episode_index = 13, len(self.buffer) = 13
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =294
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[10, 8, 2, 9]
[245, 14, 16, 222]
idx_offset = 0, self.episode_index = 13, len(self.buffer) = 13
len(sampled_ep) =282
len(sampled_ep) =270
len(sampled_ep) =294
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[8, 0, 4, 11]
[161, 197, 137, 228]
idx_offset = 0, self.episode_index = 13, len(self.buffer) = 13
len(sampled_ep) =270
len(sampled_ep) =268
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[4, 1, 1, 2]
[188, 226, 127, 200]
idx_offset = 0, self.episode_index = 13, len(self.buffer) = 13
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[3, 11, 1, 2]
[236, 30, 61, 227]
idx_offset = 0, self.episode_index = 13, len(self.buffer) = 13
len(sampled_ep) =270
len(sampled_ep) =299
len(

Target Set Success
[13, 8, 6, 2]
[64, 64, 197, 201]
idx_offset = 0, self.episode_index = 14, len(self.buffer) = 14
len(sampled_ep) =254
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[11, 8, 1, 11]
[119, 50, 107, 82]
idx_offset = 0, self.episode_index = 14, len(self.buffer) = 14
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[1, 9, 2, 0]
[91, 14, 57, 22]
idx_offset = 0, self.episode_index = 14, len(self.buffer) = 14
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =294
len(sampled_ep) =268
(4, 8, 6)
Target Set Success
[1, 7, 7, 6]
[266, 100, 248, 101]
idx_offset = 0, self.episode_index = 14, len(self.buffer) = 14
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[8, 5, 7, 3]
[204, 130, 257, 149]
idx_offset = 0, self.episode_index = 14, len(self.buffer) = 14
len(sampled_ep) =270
len(sampled_ep) =266
len(sam

Target Set Success
[5, 9, 0, 8]
[146, 56, 257, 184]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =268
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[6, 0, 9, 8]
[142, 126, 207, 202]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =270
len(sampled_ep) =268
len(sampled_ep) =266
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[7, 13, 2, 8]
[196, 85, 251, 177]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =299
len(sampled_ep) =254
len(sampled_ep) =294
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[1, 2, 2, 6]
[216, 159, 210, 108]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =299
len(sampled_ep) =294
len(sampled_ep) =294
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[8, 6, 4, 0]
[33, 117, 222, 29]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =270
len(sampled_ep) =270
len(s

Target Set Success
[1, 0, 13, 3]
[210, 74, 63, 190]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =299
len(sampled_ep) =268
len(sampled_ep) =254
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[7, 5, 8, 4]
[145, 232, 172, 127]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[9, 12, 2, 9]
[192, 186, 76, 31]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =294
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[2, 5, 2, 0]
[92, 232, 18, 41]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =294
len(sampled_ep) =266
len(sampled_ep) =294
len(sampled_ep) =268
(4, 8, 6)
Target Set Success
[10, 8, 6, 8]
[223, 188, 65, 19]
idx_offset = 0, self.episode_index = 15, len(self.buffer) = 15
len(sampled_ep) =282
len(sampled_ep) =270
len(samp

Target Set Success
[3, 8, 1, 15]
[83, 242, 119, 37]
idx_offset = 0, self.episode_index = 16, len(self.buffer) = 16
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[2, 2, 3, 2]
[273, 283, 210, 15]
idx_offset = 0, self.episode_index = 16, len(self.buffer) = 16
len(sampled_ep) =294
len(sampled_ep) =294
len(sampled_ep) =270
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[2, 13, 7, 10]
[229, 241, 279, 38]
idx_offset = 0, self.episode_index = 16, len(self.buffer) = 16
len(sampled_ep) =294
len(sampled_ep) =254
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[9, 1, 11, 14]
[85, 156, 140, 288]
idx_offset = 0, self.episode_index = 16, len(self.buffer) = 16
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[13, 9, 4, 2]
[88, 225, 165, 264]
idx_offset = 0, self.episode_index = 16, len(self.buffer) = 16
len(sampled_ep) =254
len(sampled_ep) =266
le

Target Set Success
[5, 8, 7, 2]
[31, 216, 57, 61]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[2, 6, 4, 13]
[275, 241, 224, 123]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =294
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =254
(4, 8, 6)
Target Set Success
[9, 9, 11, 6]
[153, 143, 78, 97]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[4, 0, 14, 13]
[107, 264, 53, 86]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =270
len(sampled_ep) =268
len(sampled_ep) =298
len(sampled_ep) =254
(4, 8, 6)
Target Set Success
[11, 16, 6, 0]
[17, 246, 207, 197]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =299
len(sampled_ep) =290
len(

Target Set Success
[13, 0, 2, 9]
[187, 75, 111, 96]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =254
len(sampled_ep) =268
len(sampled_ep) =294
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[5, 2, 4, 6]
[192, 169, 151, 37]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =266
len(sampled_ep) =294
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[5, 5, 6, 10]
[250, 128, 54, 39]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[9, 11, 0, 16]
[227, 108, 77, 12]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =268
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[15, 0, 16, 13]
[172, 210, 154, 48]
idx_offset = 0, self.episode_index = 17, len(self.buffer) = 17
len(sampled_ep) =299
len(sampled_ep) =268
len

Target Set Success
[7, 4, 2, 0]
[216, 230, 139, 70]
idx_offset = 0, self.episode_index = 18, len(self.buffer) = 18
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =294
len(sampled_ep) =268
(4, 8, 6)
Target Set Success
[2, 4, 0, 15]
[49, 74, 251, 167]
idx_offset = 0, self.episode_index = 18, len(self.buffer) = 18
len(sampled_ep) =294
len(sampled_ep) =270
len(sampled_ep) =268
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[4, 9, 13, 1]
[98, 159, 64, 23]
idx_offset = 0, self.episode_index = 18, len(self.buffer) = 18
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =254
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[2, 15, 4, 5]
[217, 233, 200, 137]
idx_offset = 0, self.episode_index = 18, len(self.buffer) = 18
len(sampled_ep) =294
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[8, 10, 10, 6]
[30, 126, 226, 88]
idx_offset = 0, self.episode_index = 18, len(self.buffer) = 18
len(sampled_ep) =270
len(sampled_ep) =282
len(s

Target Set Success
[16, 5, 2, 4]
[159, 174, 128, 247]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =290
len(sampled_ep) =266
len(sampled_ep) =294
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[6, 7, 9, 15]
[52, 132, 181, 280]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[18, 18, 11, 10]
[154, 44, 43, 264]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[11, 16, 18, 7]
[120, 34, 237, 165]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[8, 10, 9, 9]
[138, 245, 215, 22]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =270
len(sampled_ep) =2

Target Set Success
[1, 6, 10, 5]
[128, 94, 180, 10]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =282
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[1, 6, 3, 10]
[286, 20, 125, 236]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[1, 3, 1, 1]
[221, 258, 180, 132]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[11, 4, 11, 0]
[117, 138, 228, 42]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =268
(4, 8, 6)
Target Set Success
[3, 17, 18, 11]
[196, 172, 43, 126]
idx_offset = 0, self.episode_index = 19, len(self.buffer) = 19
len(sampled_ep) =270
len(sampled_ep) =299


Target Set Success
[19, 11, 11, 10]
[177, 18, 183, 88]
idx_offset = 0, self.episode_index = 20, len(self.buffer) = 20
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[6, 1, 14, 19]
[107, 107, 53, 266]
idx_offset = 0, self.episode_index = 20, len(self.buffer) = 20
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[17, 0, 9, 6]
[200, 122, 142, 116]
idx_offset = 0, self.episode_index = 20, len(self.buffer) = 20
len(sampled_ep) =299
len(sampled_ep) =268
len(sampled_ep) =266
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[1, 7, 3, 18]
[231, 147, 52, 203]
idx_offset = 0, self.episode_index = 20, len(self.buffer) = 20
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[17, 7, 13, 19]
[142, 96, 53, 168]
idx_offset = 0, self.episode_index = 20, len(self.buffer) = 20
len(sampled_ep) =299
len(sampled_ep) =2

Target Set Success
[13, 0, 7, 16]
[64, 160, 228, 182]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =254
len(sampled_ep) =268
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[0, 2, 2, 9]
[202, 253, 250, 39]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =268
len(sampled_ep) =294
len(sampled_ep) =294
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[0, 7, 19, 4]
[39, 242, 77, 265]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =268
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[2, 10, 7, 1]
[271, 86, 175, 184]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =294
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[4, 15, 10, 16]
[48, 32, 266, 246]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =270
len(sampled_ep) =299
le

Target Set Success
[3, 7, 20, 13]
[107, 291, 83, 56]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =254
(4, 8, 6)
Target Set Success
[16, 19, 20, 3]
[215, 295, 163, 45]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[3, 10, 7, 10]
[244, 245, 231, 221]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =270
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[3, 15, 7, 7]
[102, 67, 258, 275]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[16, 9, 19, 15]
[158, 39, 175, 74]
idx_offset = 0, self.episode_index = 21, len(self.buffer) = 21
len(sampled_ep) =290
len(sampled_ep) =2

Target Set Success
[13, 19, 8, 2]
[82, 49, 123, 260]
idx_offset = 0, self.episode_index = 22, len(self.buffer) = 22
len(sampled_ep) =254
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[5, 8, 11, 10]
[47, 162, 177, 35]
idx_offset = 0, self.episode_index = 22, len(self.buffer) = 22
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[9, 8, 12, 17]
[144, 227, 162, 188]
idx_offset = 0, self.episode_index = 22, len(self.buffer) = 22
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[1, 3, 1, 3]
[87, 157, 187, 262]
idx_offset = 0, self.episode_index = 22, len(self.buffer) = 22
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[10, 3, 3, 11]
[238, 197, 30, 109]
idx_offset = 0, self.episode_index = 22, len(self.buffer) = 22
len(sampled_ep) =282
len(sampled_ep) =270


Target Set Success
[22, 17, 9, 9]
[39, 172, 129, 44]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[4, 21, 2, 8]
[244, 38, 51, 165]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =294
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[8, 20, 6, 10]
[262, 162, 181, 228]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[13, 1, 1, 17]
[176, 231, 159, 54]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =254
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[9, 4, 11, 9]
[20, 59, 75, 158]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =266
len(sampled_ep) =270
le

Target Set Success
[19, 18, 1, 21]
[42, 234, 23, 230]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[11, 3, 6, 18]
[228, 227, 25, 34]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[11, 5, 16, 19]
[124, 36, 158, 197]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[21, 1, 10, 0]
[42, 272, 191, 79]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =268
(4, 8, 6)
Target Set Success
[3, 2, 18, 18]
[229, 140, 194, 284]
idx_offset = 0, self.episode_index = 23, len(self.buffer) = 23
len(sampled_ep) =270
len(sampled_ep) =2

Target Set Success
[8, 7, 22, 8]
[259, 166, 161, 234]
idx_offset = 0, self.episode_index = 24, len(self.buffer) = 24
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[0, 0, 4, 12]
[121, 68, 192, 30]
idx_offset = 0, self.episode_index = 24, len(self.buffer) = 24
len(sampled_ep) =268
len(sampled_ep) =268
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[20, 1, 6, 21]
[146, 251, 198, 169]
idx_offset = 0, self.episode_index = 24, len(self.buffer) = 24
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[2, 13, 6, 18]
[42, 123, 201, 290]
idx_offset = 0, self.episode_index = 24, len(self.buffer) = 24
len(sampled_ep) =294
len(sampled_ep) =254
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[12, 19, 17, 2]
[279, 279, 172, 117]
idx_offset = 0, self.episode_index = 24, len(self.buffer) = 24
len(sampled_ep) =299
len(sampled_ep) =

Target Set Success
[3, 1, 24, 0]
[27, 146, 167, 186]
idx_offset = 0, self.episode_index = 25, len(self.buffer) = 25
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =268
(4, 8, 6)
Target Set Success
[1, 10, 22, 16]
[180, 177, 228, 236]
idx_offset = 0, self.episode_index = 25, len(self.buffer) = 25
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[16, 3, 5, 21]
[23, 145, 112, 118]
idx_offset = 0, self.episode_index = 25, len(self.buffer) = 25
len(sampled_ep) =290
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[19, 11, 3, 18]
[278, 234, 92, 132]
idx_offset = 0, self.episode_index = 25, len(self.buffer) = 25
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[9, 9, 18, 14]
[119, 121, 129, 17]
idx_offset = 0, self.episode_index = 25, len(self.buffer) = 25
len(sampled_ep) =266
len(sampled_ep) 

Target Set Success
[9, 20, 4, 2]
[39, 131, 139, 35]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[16, 21, 0, 7]
[246, 107, 206, 123]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =268
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[12, 17, 16, 2]
[93, 166, 242, 192]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[21, 5, 22, 17]
[60, 107, 270, 78]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[5, 11, 7, 20]
[31, 274, 165, 205]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =266
len(sampled_ep) =2

Target Set Success
[8, 24, 7, 19]
[172, 231, 175, 214]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[0, 1, 6, 11]
[33, 54, 162, 224]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =268
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[9, 24, 6, 22]
[16, 8, 28, 286]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[1, 14, 2, 24]
[62, 129, 259, 230]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =294
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[2, 25, 14, 19]
[241, 108, 120, 248]
idx_offset = 0, self.episode_index = 26, len(self.buffer) = 26
len(sampled_ep) =294
len(sampled_ep) =292

Target Set Success
[6, 6, 11, 22]
[116, 16, 21, 192]
idx_offset = 0, self.episode_index = 27, len(self.buffer) = 27
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[0, 5, 18, 4]
[170, 193, 274, 146]
idx_offset = 0, self.episode_index = 27, len(self.buffer) = 27
len(sampled_ep) =268
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[3, 25, 2, 2]
[250, 105, 149, 61]
idx_offset = 0, self.episode_index = 27, len(self.buffer) = 27
len(sampled_ep) =270
len(sampled_ep) =292
len(sampled_ep) =294
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[10, 25, 11, 9]
[222, 30, 7, 241]
idx_offset = 0, self.episode_index = 27, len(self.buffer) = 27
len(sampled_ep) =282
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[23, 21, 11, 10]
[164, 139, 123, 177]
idx_offset = 0, self.episode_index = 27, len(self.buffer) = 27
len(sampled_ep) =274
len(sampled_ep) =2

Target Set Success
[11, 4, 0, 8]
[148, 43, 161, 81]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =268
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[24, 24, 7, 22]
[49, 18, 45, 130]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[0, 23, 19, 13]
[111, 102, 278, 62]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =268
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =254
(4, 8, 6)
Target Set Success
[9, 24, 7, 2]
[39, 171, 86, 44]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[9, 18, 20, 20]
[41, 245, 169, 144]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =266
len(sampled_ep) =299
l

Target Set Success
[9, 18, 19, 8]
[218, 243, 17, 178]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[8, 9, 24, 2]
[179, 83, 168, 10]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[2, 19, 10, 8]
[283, 295, 104, 61]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =294
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[15, 27, 22, 2]
[50, 199, 216, 170]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =294
(4, 8, 6)
Target Set Success
[26, 4, 15, 10]
[176, 204, 62, 214]
idx_offset = 0, self.episode_index = 28, len(self.buffer) = 28
len(sampled_ep) =299
len(sampled_ep) =2

Target Set Success
[18, 8, 2, 14]
[225, 252, 283, 13]
idx_offset = 0, self.episode_index = 29, len(self.buffer) = 29
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =294
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[20, 9, 7, 9]
[172, 217, 202, 123]
idx_offset = 0, self.episode_index = 29, len(self.buffer) = 29
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[23, 11, 19, 6]
[18, 228, 246, 53]
idx_offset = 0, self.episode_index = 29, len(self.buffer) = 29
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[27, 8, 27, 15]
[151, 183, 210, 161]
idx_offset = 0, self.episode_index = 29, len(self.buffer) = 29
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[16, 18, 9, 19]
[149, 234, 248, 277]
idx_offset = 0, self.episode_index = 29, len(self.buffer) = 29
len(sampled_ep) =290
len(sampled_ep

Target Set Success
[10, 2, 10, 25]
[161, 247, 50, 178]
idx_offset = 0, self.episode_index = 30, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_ep) =294
len(sampled_ep) =282
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[19, 10, 13, 19]
[276, 161, 241, 259]
idx_offset = 0, self.episode_index = 30, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =254
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[1, 20, 17, 10]
[291, 17, 241, 198]
idx_offset = 0, self.episode_index = 30, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[0, 13, 22, 8]
[169, 52, 123, 134]
idx_offset = 0, self.episode_index = 30, len(self.buffer) = 30
len(sampled_ep) =268
len(sampled_ep) =254
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[11, 22, 10, 13]
[242, 245, 162, 94]
idx_offset = 0, self.episode_index = 30, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled

Target Set Success
[26, 17, 4, 10]
[194, 85, 102, 29]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[20, 4, 17, 19]
[29, 161, 85, 170]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[6, 18, 9, 19]
[21, 262, 144, 287]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[27, 28, 14, 20]
[56, 167, 199, 146]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[23, 2, 1, 9]
[159, 287, 107, 264]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) 

Target Set Success
[1, 14, 15, 12]
[278, 53, 172, 147]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[15, 9, 10, 8]
[54, 37, 177, 153]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =282
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[7, 6, 7, 7]
[83, 156, 17, 282]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[13, 12, 7, 23]
[10, 270, 159, 16]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =254
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[7, 19, 18, 30]
[156, 32, 251, 152]
idx_offset = 1, self.episode_index = 31, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299

Target Set Success
[18, 30, 17, 24]
[113, 100, 38, 123]
idx_offset = 2, self.episode_index = 32, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[7, 27, 5, 5]
[114, 211, 239, 134]
idx_offset = 2, self.episode_index = 32, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[9, 4, 30, 9]
[195, 10, 176, 213]
idx_offset = 2, self.episode_index = 32, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[18, 20, 31, 3]
[71, 64, 156, 181]
idx_offset = 2, self.episode_index = 32, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[3, 3, 6, 25]
[257, 177, 147, 85]
idx_offset = 2, self.episode_index = 32, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =2

Target Set Success
[9, 8, 10, 14]
[39, 268, 263, 29]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =282
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[11, 12, 6, 3]
[82, 93, 262, 210]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[24, 20, 9, 16]
[145, 157, 101, 132]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[7, 4, 20, 8]
[286, 164, 163, 173]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[9, 7, 18, 16]
[88, 215, 164, 278]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =2

Target Set Success
[3, 25, 7, 23]
[139, 200, 117, 270]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[32, 21, 30, 21]
[173, 64, 68, 19]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[10, 20, 3, 7]
[32, 179, 79, 196]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[20, 28, 9, 4]
[150, 97, 222, 122]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[22, 15, 22, 24]
[216, 267, 254, 163]
idx_offset = 3, self.episode_index = 33, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep)

Target Set Success
[21, 21, 8, 23]
[38, 122, 34, 177]
idx_offset = 4, self.episode_index = 34, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[33, 4, 24, 11]
[78, 87, 147, 165]
idx_offset = 4, self.episode_index = 34, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[9, 28, 4, 24]
[92, 244, 130, 41]
idx_offset = 4, self.episode_index = 34, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[6, 15, 6, 26]
[123, 125, 246, 181]
idx_offset = 4, self.episode_index = 34, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[11, 14, 8, 23]
[123, 204, 10, 230]
idx_offset = 4, self.episode_index = 34, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =

Target Set Success
[27, 24, 14, 16]
[248, 159, 150, 147]
idx_offset = 5, self.episode_index = 35, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =298
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[12, 21, 13, 18]
[89, 234, 193, 16]
idx_offset = 5, self.episode_index = 35, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =254
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[30, 32, 22, 5]
[174, 17, 239, 211]
idx_offset = 5, self.episode_index = 35, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[7, 21, 29, 16]
[180, 278, 238, 150]
idx_offset = 5, self.episode_index = 35, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[30, 10, 22, 8]
[209, 258, 41, 30]
idx_offset = 5, self.episode_index = 35, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled

Target Set Success
[24, 14, 8, 34]
[256, 139, 146, 181]
idx_offset = 5, self.episode_index = 35, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =298
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
length of poped element = 270 ,cntr = 265
Target Set Success
[15, 32, 19, 35]
[214, 196, 183, 35]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[24, 8, 31, 10]
[163, 212, 200, 198]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[16, 20, 31, 13]
[268, 263, 194, 64]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =254
(4, 8, 6)
Target Set Success
[15, 10, 25, 19]
[172, 183, 40, 231]
idx_offset = 6, self.episode_index = 36, len(self.

Target Set Success
[6, 17, 30, 9]
[178, 259, 51, 47]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[14, 18, 21, 11]
[124, 197, 104, 101]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[11, 34, 20, 21]
[17, 186, 131, 97]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[16, 34, 20, 34]
[154, 158, 204, 97]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[18, 34, 21, 16]
[44, 297, 297, 136]
idx_offset = 6, self.episode_index = 36, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled

Target Set Success
[32, 33, 27, 10]
[124, 49, 258, 239]
idx_offset = 7, self.episode_index = 37, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[17, 24, 20, 12]
[149, 234, 103, 293]
idx_offset = 7, self.episode_index = 37, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[25, 8, 32, 22]
[35, 142, 253, 114]
idx_offset = 7, self.episode_index = 37, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[10, 8, 23, 36]
[115, 202, 216, 19]
idx_offset = 7, self.episode_index = 37, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_ep) =270
len(sampled_ep) =274
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[21, 19, 36, 27]
[140, 177, 89, 186]
idx_offset = 7, self.episode_index = 37, len(self.buffer) = 30
len(sampled_ep) =299
len(sampl

Target Set Success
[21, 8, 37, 18]
[57, 114, 153, 207]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[9, 21, 23, 14]
[39, 131, 203, 273]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[22, 22, 16, 30]
[250, 218, 272, 26]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[28, 18, 22, 31]
[92, 44, 256, 286]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[10, 22, 10, 10]
[82, 138, 268, 92]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_

Target Set Success
[31, 35, 16, 12]
[288, 53, 162, 56]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[10, 13, 22, 10]
[279, 97, 178, 43]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_ep) =254
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[36, 29, 21, 29]
[76, 223, 41, 220]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[19, 23, 10, 11]
[40, 32, 59, 127]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =282
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[8, 23, 36, 16]
[89, 194, 240, 232]
idx_offset = 8, self.episode_index = 38, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep

Target Set Success
[19, 14, 34, 21]
[22, 129, 292, 186]
idx_offset = 9, self.episode_index = 39, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[37, 23, 30, 9]
[223, 246, 140, 248]
idx_offset = 9, self.episode_index = 39, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =266
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[35, 19, 21, 24]
[248, 137, 153, 39]
idx_offset = 9, self.episode_index = 39, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[9, 21, 21, 24]
[141, 58, 277, 192]
idx_offset = 9, self.episode_index = 39, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[19, 23, 38, 33]
[132, 268, 242, 108]
idx_offset = 9, self.episode_index = 39, len(self.buffer) = 30
len(sampled_ep) =299
len(samp

Target Set Success
[16, 34, 34, 23]
[235, 139, 42, 141]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[21, 14, 36, 21]
[160, 142, 275, 212]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[19, 34, 22, 18]
[261, 161, 181, 209]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[26, 15, 25, 25]
[20, 297, 36, 77]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[28, 39, 37, 20]
[78, 128, 153, 222]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[33, 33, 10, 37]
[200, 200, 271, 47]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =282
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[27, 20, 26, 21]
[209, 116, 19, 288]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[15, 31, 26, 23]
[282, 139, 148, 71]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[35, 19, 11, 24]
[183, 162, 192, 128]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[20, 32, 20, 24]
[154, 151, 32, 194]
idx_offset = 10, self.episode_index = 40, len(self.buffer) = 30
len(sampled_ep) =270
le

Target Set Success
[12, 37, 38, 21]
[226, 40, 167, 233]
idx_offset = 11, self.episode_index = 41, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[19, 32, 37, 37]
[277, 17, 162, 55]
idx_offset = 11, self.episode_index = 41, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[40, 36, 26, 24]
[68, 131, 17, 29]
idx_offset = 11, self.episode_index = 41, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[21, 16, 16, 25]
[38, 60, 272, 250]
idx_offset = 11, self.episode_index = 41, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =290
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[18, 19, 28, 17]
[197, 101, 237, 173]
idx_offset = 11, self.episode_index = 41, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[20, 23, 19, 33]
[184, 178, 296, 49]
idx_offset = 12, self.episode_index = 42, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[25, 24, 18, 40]
[36, 161, 194, 22]
idx_offset = 12, self.episode_index = 42, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[14, 27, 32, 28]
[208, 53, 16, 55]
idx_offset = 12, self.episode_index = 42, len(self.buffer) = 30
len(sampled_ep) =298
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[39, 15, 34, 39]
[224, 78, 85, 238]
idx_offset = 12, self.episode_index = 42, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[29, 18, 37, 21]
[65, 141, 151, 111]
idx_offset = 12, self.episode_index = 42, len(self.buffer) = 30
len(sampled_ep) =270
len(sam

Target Set Success
[19, 34, 35, 33]
[291, 296, 71, 182]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[42, 25, 30, 28]
[182, 84, 190, 116]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 35, 22, 38]
[130, 86, 211, 128]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[19, 16, 36, 28]
[231, 171, 100, 150]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[19, 18, 21, 21]
[17, 89, 63, 133]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[23, 36, 19, 19]
[23, 274, 27, 295]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[16, 29, 26, 40]
[123, 246, 177, 166]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[14, 38, 17, 26]
[53, 259, 245, 113]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[25, 21, 19, 34]
[156, 52, 175, 161]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[24, 27, 26, 27]
[198, 33, 284, 131]
idx_offset = 13, self.episode_index = 43, len(self.buffer) = 30
len(sampled_ep) =270
len

Target Set Success
[25, 28, 28, 38]
[188, 179, 153, 207]
idx_offset = 14, self.episode_index = 44, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[30, 18, 36, 23]
[24, 44, 35, 267]
idx_offset = 14, self.episode_index = 44, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[31, 34, 34, 32]
[224, 256, 31, 142]
idx_offset = 14, self.episode_index = 44, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[16, 18, 35, 17]
[193, 225, 98, 166]
idx_offset = 14, self.episode_index = 44, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[22, 24, 26, 23]
[57, 116, 148, 237]
idx_offset = 14, self.episode_index = 44, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[16, 17, 34, 18]
[52, 166, 29, 152]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[44, 42, 41, 24]
[270, 182, 189, 157]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[24, 26, 24, 26]
[111, 16, 255, 147]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 24, 18, 26]
[77, 211, 253, 147]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[16, 23, 28, 40]
[143, 250, 91, 22]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =290
len(

Target Set Success
[35, 44, 23, 25]
[245, 213, 252, 124]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[19, 27, 29, 20]
[170, 11, 222, 154]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[16, 34, 42, 39]
[206, 73, 87, 239]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[43, 29, 36, 18]
[201, 101, 224, 252]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =270
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[39, 43, 19, 22]
[92, 62, 177, 38]
idx_offset = 15, self.episode_index = 45, len(self.buffer) = 30
len(sampled_ep) =270
len(

Target Set Success
[21, 42, 39, 41]
[100, 256, 48, 185]
idx_offset = 16, self.episode_index = 46, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[43, 17, 24, 25]
[202, 44, 220, 143]
idx_offset = 16, self.episode_index = 46, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[37, 25, 41, 25]
[55, 171, 152, 36]
idx_offset = 16, self.episode_index = 46, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =244
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[41, 26, 38, 22]
[188, 122, 178, 88]
idx_offset = 16, self.episode_index = 46, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[25, 21, 43, 24]
[152, 140, 29, 248]
idx_offset = 16, self.episode_index = 46, len(self.buffer) = 30
len(sampled_ep) =292
len(

Target Set Success
[24, 37, 36, 26]
[149, 123, 124, 255]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[31, 28, 40, 39]
[107, 119, 12, 103]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[28, 32, 18, 26]
[237, 17, 262, 164]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[26, 40, 41, 30]
[17, 260, 189, 16]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =244
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[31, 17, 22, 25]
[121, 284, 103, 42]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[37, 31, 18, 32]
[157, 37, 13, 176]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[45, 41, 23, 19]
[226, 223, 252, 295]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[45, 33, 38, 32]
[204, 51, 288, 62]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[34, 45, 43, 31]
[199, 65, 23, 122]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[34, 20, 20, 20]
[126, 165, 205, 218]
idx_offset = 17, self.episode_index = 47, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[43, 25, 42, 19]
[231, 164, 47, 244]
idx_offset = 18, self.episode_index = 48, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[44, 47, 38, 24]
[87, 158, 188, 214]
idx_offset = 18, self.episode_index = 48, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[32, 29, 42, 34]
[259, 148, 115, 42]
idx_offset = 18, self.episode_index = 48, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[35, 36, 44, 36]
[216, 18, 134, 252]
idx_offset = 18, self.episode_index = 48, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[44, 47, 21, 20]
[270, 112, 59, 145]
idx_offset = 18, self.episode_index = 48, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[22, 40, 43, 43]
[45, 22, 47, 75]
idx_offset = 19, self.episode_index = 49, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =244
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[21, 27, 32, 47]
[218, 262, 204, 81]
idx_offset = 19, self.episode_index = 49, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[19, 23, 32, 40]
[271, 13, 196, 139]
idx_offset = 19, self.episode_index = 49, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =270
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[34, 20, 19, 23]
[249, 25, 178, 212]
idx_offset = 19, self.episode_index = 49, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[32, 20, 39, 26]
[50, 91, 230, 139]
idx_offset = 19, self.episode_index = 49, len(self.buffer) = 30
len(sampled_ep) =270
len(sam

Target Set Success
[43, 38, 45, 29]
[64, 252, 175, 169]
idx_offset = 20, self.episode_index = 50, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[23, 27, 21, 22]
[24, 191, 266, 158]
idx_offset = 20, self.episode_index = 50, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[24, 35, 26, 33]
[13, 125, 280, 43]
idx_offset = 20, self.episode_index = 50, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[22, 20, 32, 41]
[185, 246, 95, 228]
idx_offset = 20, self.episode_index = 50, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[44, 35, 24, 43]
[119, 258, 171, 72]
idx_offset = 20, self.episode_index = 50, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[31, 42, 46, 37]
[200, 136, 294, 184]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 34, 21, 36]
[40, 190, 212, 35]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[43, 49, 39, 36]
[26, 260, 236, 184]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[26, 39, 22, 26]
[88, 242, 207, 178]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[23, 38, 48, 47]
[16, 226, 217, 250]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =274
len

Target Set Success
[43, 34, 41, 24]
[69, 130, 198, 156]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[37, 49, 25, 45]
[133, 79, 250, 99]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[25, 46, 49, 39]
[262, 203, 219, 234]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[37, 38, 27, 31]
[54, 252, 150, 203]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[28, 23, 44, 35]
[256, 179, 115, 163]
idx_offset = 21, self.episode_index = 51, len(self.buffer) = 30
len(sampled_ep) =299
le

Target Set Success
[25, 32, 28, 33]
[194, 262, 237, 79]
idx_offset = 22, self.episode_index = 52, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[30, 34, 37, 28]
[45, 42, 162, 153]
idx_offset = 22, self.episode_index = 52, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[39, 24, 51, 51]
[132, 93, 281, 228]
idx_offset = 22, self.episode_index = 52, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[26, 25, 49, 39]
[19, 104, 140, 223]
idx_offset = 22, self.episode_index = 52, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[31, 47, 26, 32]
[274, 158, 123, 70]
idx_offset = 22, self.episode_index = 52, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[32, 24, 42, 34]
[83, 248, 7, 128]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[25, 38, 24, 39]
[95, 251, 161, 84]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[43, 26, 36, 32]
[187, 162, 91, 13]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[24, 44, 35, 33]
[158, 173, 123, 184]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[39, 52, 47, 27]
[51, 87, 158, 248]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =270
len(sam

Target Set Success
[44, 43, 41, 44]
[121, 101, 232, 95]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[29, 28, 42, 45]
[197, 286, 179, 78]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[35, 33, 24, 24]
[57, 183, 95, 21]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[43, 26, 31, 24]
[228, 104, 234, 238]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[50, 42, 51, 41]
[102, 59, 276, 15]
idx_offset = 23, self.episode_index = 53, len(self.buffer) = 30
len(sampled_ep) =228
len(s

Target Set Success
[45, 34, 43, 48]
[242, 282, 176, 185]
idx_offset = 24, self.episode_index = 54, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[31, 28, 26, 41]
[277, 288, 129, 233]
idx_offset = 24, self.episode_index = 54, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[29, 34, 46, 27]
[147, 199, 131, 154]
idx_offset = 24, self.episode_index = 54, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[28, 52, 28, 30]
[261, 117, 140, 67]
idx_offset = 24, self.episode_index = 54, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[35, 43, 41, 47]
[60, 193, 89, 66]
idx_offset = 24, self.episode_index = 54, len(self.buffer) = 30
len(sampled_ep) =266
le

Target Set Success
[37, 52, 32, 32]
[255, 148, 50, 44]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[47, 28, 54, 43]
[158, 132, 37, 176]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =216
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[44, 26, 30, 38]
[45, 275, 182, 47]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[51, 42, 35, 38]
[109, 36, 129, 244]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[50, 47, 31, 38]
[94, 158, 255, 263]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =228
len(s

Target Set Success
[45, 34, 36, 47]
[109, 77, 61, 145]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[35, 33, 54, 32]
[20, 145, 37, 10]
idx_offset = 25, self.episode_index = 55, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =216
len(sampled_ep) =270
(4, 8, 6)
length of poped element = 299 ,cntr = 291
Target Set Success
[52, 34, 41, 30]
[19, 63, 62, 77]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[28, 53, 53, 52]
[187, 21, 259, 144]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[43, 50, 37, 52]
[176, 12, 179, 37]
idx_offset = 26, self.episode_index = 56, len(self.bu

Target Set Success
[26, 43, 49, 40]
[291, 76, 140, 47]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[41, 53, 36, 55]
[189, 65, 16, 161]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[33, 39, 47, 38]
[141, 94, 268, 87]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[28, 47, 42, 52]
[237, 158, 16, 280]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[33, 44, 42, 30]
[51, 166, 269, 43]
idx_offset = 26, self.episode_index = 56, len(self.buffer) = 30
len(sampled_ep) =270
len(sam

Target Set Success
[53, 50, 35, 48]
[294, 164, 162, 207]
idx_offset = 27, self.episode_index = 57, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =228
len(sampled_ep) =266
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[44, 38, 43, 47]
[270, 49, 14, 203]
idx_offset = 27, self.episode_index = 57, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[39, 34, 35, 54]
[89, 239, 108, 64]
idx_offset = 27, self.episode_index = 57, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =216
(4, 8, 6)
Target Set Success
[53, 46, 33, 40]
[77, 128, 187, 86]
idx_offset = 27, self.episode_index = 57, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[28, 35, 54, 39]
[270, 101, 165, 188]
idx_offset = 27, self.episode_index = 57, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[41, 37, 33, 51]
[170, 162, 251, 270]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[47, 35, 44, 52]
[22, 20, 200, 206]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[38, 29, 38, 39]
[180, 83, 114, 61]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[34, 44, 55, 42]
[35, 156, 272, 147]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[37, 43, 55, 52]
[234, 17, 110, 259]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[39, 51, 44, 43]
[148, 228, 25, 193]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[55, 45, 33, 31]
[227, 254, 51, 224]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[38, 47, 55, 57]
[70, 59, 66, 56]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[42, 48, 54, 38]
[226, 197, 68, 181]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =216
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[43, 36, 42, 40]
[107, 185, 147, 184]
idx_offset = 28, self.episode_index = 58, len(self.buffer) = 30
len(sampled_ep) =244
len(s

Target Set Success
[32, 43, 50, 41]
[73, 70, 180, 182]
idx_offset = 29, self.episode_index = 59, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =244
len(sampled_ep) =228
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[47, 58, 40, 49]
[14, 228, 279, 140]
idx_offset = 29, self.episode_index = 59, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =272
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 58, 47, 38]
[145, 163, 71, 10]
idx_offset = 29, self.episode_index = 59, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =272
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 34, 38, 47]
[133, 231, 68, 17]
idx_offset = 29, self.episode_index = 59, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[58, 50, 45, 42]
[8, 105, 162, 216]
idx_offset = 29, self.episode_index = 59, len(self.buffer) = 30
len(sampled_ep) =272
len(sam

Target Set Success
[42, 46, 44, 35]
[175, 71, 26, 62]
idx_offset = 30, self.episode_index = 60, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[36, 32, 36, 52]
[19, 160, 82, 266]
idx_offset = 30, self.episode_index = 60, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =270
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[49, 57, 43, 31]
[15, 182, 9, 41]
idx_offset = 30, self.episode_index = 60, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[57, 43, 36, 37]
[136, 27, 278, 236]
idx_offset = 30, self.episode_index = 60, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =244
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[38, 44, 44, 39]
[148, 153, 79, 65]
idx_offset = 30, self.episode_index = 60, len(self.buffer) = 30
len(sampled_ep) =299
len(sample

Target Set Success
[32, 46, 37, 46]
[225, 136, 68, 294]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[37, 46, 39, 47]
[254, 41, 209, 212]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[37, 32, 41, 53]
[158, 217, 140, 281]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[46, 33, 45, 43]
[203, 11, 177, 181]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[44, 57, 36, 40]
[123, 241, 155, 65]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
le

Target Set Success
[50, 47, 53, 41]
[178, 132, 101, 80]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =228
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[58, 36, 40, 32]
[244, 80, 110, 64]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =272
len(sampled_ep) =284
len(sampled_ep) =290
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[38, 46, 53, 59]
[50, 61, 91, 14]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[44, 39, 44, 56]
[84, 61, 108, 9]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 59, 51, 39]
[31, 211, 192, 36]
idx_offset = 31, self.episode_index = 61, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled

Target Set Success
[55, 46, 41, 54]
[294, 203, 17, 82]
idx_offset = 32, self.episode_index = 62, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =216
(4, 8, 6)
Target Set Success
[50, 49, 54, 47]
[193, 219, 129, 158]
idx_offset = 32, self.episode_index = 62, len(self.buffer) = 30
len(sampled_ep) =228
len(sampled_ep) =299
len(sampled_ep) =216
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[53, 59, 58, 50]
[193, 22, 96, 174]
idx_offset = 32, self.episode_index = 62, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =272
len(sampled_ep) =228
(4, 8, 6)
Target Set Success
[59, 58, 38, 38]
[211, 76, 44, 243]
idx_offset = 32, self.episode_index = 62, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =272
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[49, 57, 48, 59]
[131, 263, 184, 48]
idx_offset = 32, self.episode_index = 62, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[35, 51, 37, 34]
[206, 228, 210, 229]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[53, 41, 50, 36]
[136, 103, 126, 42]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =228
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[41, 46, 53, 44]
[193, 82, 19, 270]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[36, 62, 52, 48]
[237, 90, 291, 98]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[36, 54, 39, 60]
[102, 90, 147, 114]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =284
len(

Target Set Success
[49, 48, 51, 43]
[90, 85, 25, 172]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[44, 42, 56, 43]
[270, 286, 171, 176]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[53, 56, 37, 50]
[294, 88, 155, 113]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =228
(4, 8, 6)
Target Set Success
[47, 44, 42, 61]
[157, 123, 101, 53]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[62, 43, 57, 60]
[82, 68, 173, 170]
idx_offset = 33, self.episode_index = 63, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[54, 38, 40, 45]
[88, 252, 79, 126]
idx_offset = 34, self.episode_index = 64, len(self.buffer) = 30
len(sampled_ep) =216
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[52, 59, 37, 51]
[8, 90, 158, 191]
idx_offset = 34, self.episode_index = 64, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[63, 59, 56, 51]
[21, 37, 186, 228]
idx_offset = 34, self.episode_index = 64, len(self.buffer) = 30
len(sampled_ep) =296
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[43, 62, 39, 62]
[24, 116, 74, 15]
idx_offset = 34, self.episode_index = 64, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[38, 38, 53, 37]
[199, 167, 84, 24]
idx_offset = 34, self.episode_index = 64, len(self.buffer) = 30
len(sampled_ep) =299
len(sample

Target Set Success
[61, 39, 39, 53]
[222, 199, 233, 101]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[36, 44, 38, 53]
[103, 279, 218, 294]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[42, 41, 48, 58]
[280, 123, 27, 116]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =240
len(sampled_ep) =272
(4, 8, 6)
Target Set Success
[43, 40, 48, 52]
[20, 22, 61, 98]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =290
len(sampled_ep) =240
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[53, 40, 58, 64]
[280, 279, 179, 179]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[59, 46, 51, 57]
[183, 95, 276, 145]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[46, 56, 44, 37]
[251, 275, 49, 10]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[61, 38, 50, 62]
[224, 242, 108, 12]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =228
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[60, 51, 55, 44]
[235, 274, 289, 93]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[45, 59, 61, 55]
[14, 89, 27, 228]
idx_offset = 35, self.episode_index = 65, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[65, 58, 59, 65]
[146, 36, 143, 153]
idx_offset = 36, self.episode_index = 66, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =272
len(sampled_ep) =266
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[53, 50, 52, 51]
[65, 97, 184, 228]
idx_offset = 36, self.episode_index = 66, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =228
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[64, 59, 55, 51]
[49, 181, 291, 228]
idx_offset = 36, self.episode_index = 66, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[54, 63, 46, 59]
[16, 285, 141, 35]
idx_offset = 36, self.episode_index = 66, len(self.buffer) = 30
len(sampled_ep) =216
len(sampled_ep) =296
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[40, 48, 46, 51]
[60, 19, 43, 114]
idx_offset = 36, self.episode_index = 66, len(self.buffer) = 30
len(sampled_ep) =290
len(sam

Target Set Success
[62, 47, 65, 64]
[49, 262, 230, 24]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[66, 51, 59, 58]
[87, 90, 149, 207]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =272
(4, 8, 6)
Target Set Success
[66, 41, 53, 62]
[233, 163, 80, 182]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[51, 62, 50, 53]
[283, 123, 171, 280]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =228
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[52, 64, 56, 52]
[235, 247, 43, 253]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[58, 43, 43, 66]
[36, 172, 225, 245]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =272
len(sampled_ep) =244
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[49, 59, 62, 54]
[184, 256, 35, 105]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =216
(4, 8, 6)
Target Set Success
[57, 59, 43, 56]
[123, 73, 176, 37]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[49, 45, 66, 66]
[249, 264, 200, 233]
idx_offset = 37, self.episode_index = 67, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
length of poped element = 299 ,cntr = 298
Target Set Success
[55, 64, 54, 49]
[226, 47, 172, 140]
idx_offset = 38, self.episode_index = 68, len(

Target Set Success
[59, 45, 42, 58]
[14, 162, 147, 90]
idx_offset = 38, self.episode_index = 68, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =272
(4, 8, 6)
Target Set Success
[66, 49, 67, 60]
[100, 61, 167, 75]
idx_offset = 38, self.episode_index = 68, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[52, 46, 65, 67]
[21, 121, 186, 85]
idx_offset = 38, self.episode_index = 68, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[56, 59, 42, 43]
[161, 96, 282, 132]
idx_offset = 38, self.episode_index = 68, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =244
(4, 8, 6)
Target Set Success
[65, 46, 58, 61]
[131, 143, 267, 206]
idx_offset = 38, self.episode_index = 68, len(self.buffer) = 30
len(sampled_ep) =274
len(s

Target Set Success
[54, 54, 41, 62]
[213, 99, 171, 97]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =216
len(sampled_ep) =216
len(sampled_ep) =244
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[41, 66, 52, 40]
[200, 250, 21, 22]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[60, 62, 68, 55]
[46, 108, 130, 51]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[47, 65, 50, 68]
[208, 114, 197, 66]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =228
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 64, 48, 47]
[152, 292, 54, 197]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[68, 40, 67, 52]
[38, 243, 207, 289]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[44, 67, 63, 65]
[102, 142, 31, 267]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[46, 56, 49, 66]
[143, 256, 249, 106]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[45, 58, 64, 68]
[154, 218, 143, 148]
idx_offset = 39, self.episode_index = 69, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =272
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
length of poped element = 290 ,cntr = 289
Target Set Success
[53, 59, 65, 64]
[287, 149, 221, 95]
idx_offset = 40, self.episode_index = 70, le

Target Set Success
[41, 66, 46, 64]
[103, 200, 20, 48]
idx_offset = 40, self.episode_index = 70, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[59, 62, 51, 67]
[165, 14, 259, 171]
idx_offset = 40, self.episode_index = 70, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[45, 67, 65, 46]
[208, 240, 62, 55]
idx_offset = 40, self.episode_index = 70, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[48, 67, 59, 65]
[19, 295, 37, 69]
idx_offset = 40, self.episode_index = 70, len(self.buffer) = 30
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[59, 47, 63, 42]
[29, 155, 23, 150]
idx_offset = 40, self.episode_index = 70, len(self.buffer) = 30
len(sampled_ep) =266
len(samp

Target Set Success
[68, 66, 45, 69]
[64, 267, 156, 15]
idx_offset = 41, self.episode_index = 71, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[53, 44, 69, 51]
[294, 255, 265, 98]
idx_offset = 41, self.episode_index = 71, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 60, 64, 62]
[191, 89, 215, 253]
idx_offset = 41, self.episode_index = 71, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 66, 55, 57]
[29, 55, 173, 123]
idx_offset = 41, self.episode_index = 71, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[55, 60, 66, 67]
[138, 182, 295, 111]
idx_offset = 41, self.episode_index = 71, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[69, 64, 67, 68]
[265, 160, 248, 82]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 63, 68, 64]
[71, 58, 74, 62]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =296
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 54, 70, 59]
[183, 17, 133, 162]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =216
len(sampled_ep) =266
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[57, 68, 66, 49]
[184, 51, 38, 180]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[51, 66, 65, 65]
[295, 283, 195, 18]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =299
len(sam

Target Set Success
[70, 70, 62, 68]
[130, 108, 191, 138]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 57, 70, 64]
[178, 138, 95, 201]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[43, 69, 55, 44]
[51, 278, 177, 70]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =244
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[69, 53, 71, 43]
[237, 140, 24, 214]
idx_offset = 42, self.episode_index = 72, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =262
len(sampled_ep) =244
(4, 8, 6)
length of poped element = 244 ,cntr = 243
Target Set Success
[61, 52, 67, 63]
[126, 217, 111, 273]
idx_offset = 43, self.episode_index = 73, len

Target Set Success
[63, 64, 69, 69]
[269, 25, 29, 209]
idx_offset = 43, self.episode_index = 73, len(self.buffer) = 30
len(sampled_ep) =296
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[63, 67, 45, 69]
[180, 55, 25, 200]
idx_offset = 43, self.episode_index = 73, len(self.buffer) = 30
len(sampled_ep) =296
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[51, 52, 52, 69]
[117, 22, 265, 113]
idx_offset = 43, self.episode_index = 73, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[54, 69, 48, 71]
[15, 266, 206, 197]
idx_offset = 43, self.episode_index = 73, len(self.buffer) = 30
len(sampled_ep) =216
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[69, 69, 68, 64]
[236, 223, 263, 70]
idx_offset = 43, self.episode_index = 73, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[71, 68, 60, 64]
[213, 45, 74, 269]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =262
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[68, 66, 63, 73]
[69, 280, 163, 140]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[59, 56, 51, 55]
[167, 56, 58, 43]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[73, 49, 73, 68]
[178, 99, 107, 129]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[69, 72, 59, 63]
[281, 183, 23, 38]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =299
len(sam

Target Set Success
[70, 57, 55, 68]
[113, 175, 102, 27]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[51, 69, 63, 45]
[274, 176, 274, 84]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 65, 56, 63]
[223, 187, 212, 164]
idx_offset = 44, self.episode_index = 74, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =296
(4, 8, 6)
length of poped element = 299 ,cntr = 298
Target Set Success
[74, 65, 71, 73]
[296, 182, 19, 156]
idx_offset = 45, self.episode_index = 75, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =262
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[65, 46, 51, 54]
[44, 203, 292, 79]
idx_offset = 45, self.episode_index = 75, len(

Target Set Success
[65, 61, 53, 73]
[172, 120, 122, 203]
idx_offset = 45, self.episode_index = 75, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 52, 68, 71]
[161, 18, 97, 22]
idx_offset = 45, self.episode_index = 75, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[65, 47, 73, 74]
[132, 242, 49, 83]
idx_offset = 45, self.episode_index = 75, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 70, 72, 61]
[50, 148, 19, 231]
idx_offset = 45, self.episode_index = 75, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[74, 54, 72, 65]
[243, 89, 110, 177]
idx_offset = 45, self.episode_index = 75, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[58, 73, 65, 69]
[128, 47, 261, 132]
idx_offset = 46, self.episode_index = 76, len(self.buffer) = 30
len(sampled_ep) =272
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[68, 65, 55, 72]
[211, 18, 180, 22]
idx_offset = 46, self.episode_index = 76, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[56, 54, 74, 65]
[162, 168, 265, 196]
idx_offset = 46, self.episode_index = 76, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =216
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[74, 48, 75, 69]
[68, 116, 62, 236]
idx_offset = 46, self.episode_index = 76, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 51, 68, 72]
[25, 156, 185, 129]
idx_offset = 46, self.episode_index = 76, len(self.buffer) = 30
len(sampled_ep) =266
len(

Target Set Success
[55, 69, 60, 53]
[247, 238, 252, 275]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[56, 69, 67, 71]
[290, 165, 77, 104]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[68, 75, 72, 67]
[16, 152, 122, 120]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[71, 57, 66, 66]
[133, 162, 135, 37]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =262
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[63, 64, 61, 65]
[101, 132, 116, 140]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =296
l

Target Set Success
[68, 69, 51, 69]
[227, 140, 272, 22]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[74, 76, 64, 64]
[138, 178, 89, 15]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[60, 75, 66, 69]
[148, 84, 147, 201]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[68, 72, 69, 76]
[130, 55, 203, 53]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[73, 67, 73, 49]
[223, 45, 200, 110]
idx_offset = 47, self.episode_index = 77, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[70, 76, 60, 74]
[114, 254, 255, 263]
idx_offset = 48, self.episode_index = 78, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[77, 60, 76, 61]
[232, 237, 122, 194]
idx_offset = 48, self.episode_index = 78, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[67, 75, 64, 69]
[111, 190, 196, 215]
idx_offset = 48, self.episode_index = 78, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[63, 68, 72, 69]
[227, 132, 68, 86]
idx_offset = 48, self.episode_index = 78, len(self.buffer) = 30
len(sampled_ep) =296
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[61, 73, 77, 76]
[124, 57, 72, 121]
idx_offset = 48, self.episode_index = 78, len(self.buffer) = 30
len(sampled_ep) =270
le

Target Set Success
[68, 77, 53, 53]
[293, 104, 51, 195]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[69, 77, 76, 68]
[225, 234, 10, 34]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[72, 62, 75, 66]
[33, 248, 95, 152]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[68, 53, 67, 68]
[176, 138, 107, 211]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[74, 73, 73, 60]
[87, 100, 259, 90]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[56, 72, 77, 77]
[162, 49, 161, 250]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[77, 76, 74, 63]
[53, 118, 156, 164]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =296
(4, 8, 6)
Target Set Success
[75, 72, 72, 69]
[92, 222, 53, 289]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[65, 52, 53, 65]
[99, 135, 280, 260]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[62, 78, 66, 51]
[118, 115, 143, 275]
idx_offset = 49, self.episode_index = 79, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[62, 65, 55, 77]
[207, 188, 89, 232]
idx_offset = 50, self.episode_index = 80, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[75, 64, 75, 65]
[85, 88, 75, 151]
idx_offset = 50, self.episode_index = 80, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[52, 66, 62, 67]
[146, 227, 102, 169]
idx_offset = 50, self.episode_index = 80, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[64, 62, 51, 67]
[236, 24, 275, 285]
idx_offset = 50, self.episode_index = 80, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[51, 63, 65, 66]
[216, 286, 53, 73]
idx_offset = 50, self.episode_index = 80, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[67, 77, 72, 67]
[208, 222, 157, 232]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[53, 67, 72, 74]
[253, 179, 184, 284]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 70, 80, 65]
[187, 169, 121, 59]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[80, 65, 69, 52]
[224, 185, 171, 171]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 80, 77, 79]
[88, 159, 144, 172]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =266


idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =262
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 77, 55, 61]
[62, 36, 199, 48]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[69, 71, 69, 52]
[78, 156, 105, 274]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =262
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 76, 62, 71]
[288, 32, 258, 21]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[73, 55, 65, 73]
[82, 94, 20, 46]
idx_offset = 51, self.episode_index = 81, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4

Target Set Success
[57, 71, 69, 80]
[127, 253, 230, 140]
idx_offset = 52, self.episode_index = 82, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =262
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 67, 65, 80]
[34, 207, 64, 218]
idx_offset = 52, self.episode_index = 82, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[59, 79, 67, 68]
[115, 286, 72, 211]
idx_offset = 52, self.episode_index = 82, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[74, 81, 81, 54]
[285, 258, 147, 82]
idx_offset = 52, self.episode_index = 82, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =216
(4, 8, 6)
Target Set Success
[53, 78, 81, 78]
[196, 77, 274, 259]
idx_offset = 52, self.episode_index = 82, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[64, 55, 57, 81]
[15, 227, 43, 52]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[69, 78, 80, 73]
[272, 118, 199, 210]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 65, 76, 64]
[224, 171, 156, 31]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 73, 68, 75]
[158, 81, 146, 93]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[73, 68, 81, 74]
[260, 51, 170, 24]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[78, 80, 81, 63]
[221, 255, 166, 284]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
(4, 8, 6)
Target Set Success
[80, 75, 69, 79]
[225, 63, 44, 70]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[69, 66, 80, 69]
[259, 135, 256, 279]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[71, 82, 74, 82]
[156, 236, 74, 178]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =262
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 80, 63, 77]
[83, 95, 75, 16]
idx_offset = 53, self.episode_index = 83, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[81, 83, 76, 71]
[267, 188, 172, 109]
idx_offset = 54, self.episode_index = 84, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[67, 68, 77, 73]
[187, 209, 129, 76]
idx_offset = 54, self.episode_index = 84, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[65, 74, 69, 59]
[234, 110, 249, 95]
idx_offset = 54, self.episode_index = 84, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[67, 72, 83, 80]
[49, 192, 276, 217]
idx_offset = 54, self.episode_index = 84, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 68, 67, 79]
[159, 227, 167, 291]
idx_offset = 54, self.episode_index = 84, len(self.buffer) = 30
len(sampled_ep) =299
l

Target Set Success
[73, 65, 79, 78]
[295, 186, 272, 10]
idx_offset = 55, self.episode_index = 85, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 66, 78, 67]
[213, 211, 115, 149]
idx_offset = 55, self.episode_index = 85, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[73, 80, 81, 69]
[153, 132, 267, 232]
idx_offset = 55, self.episode_index = 85, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 62, 80, 71]
[239, 109, 257, 109]
idx_offset = 55, self.episode_index = 85, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[66, 74, 76, 80]
[118, 65, 245, 233]
idx_offset = 55, self.episode_index = 85, len(self.buffer) = 30
len(sampled_ep) =299


Target Set Success
[69, 73, 78, 67]
[267, 297, 31, 100]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 81, 80, 67]
[289, 190, 32, 87]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 81, 65, 76]
[52, 70, 246, 100]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[80, 81, 67, 62]
[250, 158, 37, 13]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[64, 67, 73, 60]
[26, 60, 122, 99]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(samp

Target Set Success
[78, 78, 79, 73]
[288, 61, 185, 88]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[65, 73, 81, 74]
[253, 19, 80, 21]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 78, 74, 66]
[182, 260, 256, 293]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[77, 70, 82, 63]
[48, 229, 283, 131]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =296
(4, 8, 6)
Target Set Success
[82, 81, 69, 80]
[154, 266, 110, 153]
idx_offset = 56, self.episode_index = 86, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[80, 66, 80, 76]
[214, 284, 89, 38]
idx_offset = 57, self.episode_index = 87, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[65, 80, 86, 86]
[262, 291, 211, 116]
idx_offset = 57, self.episode_index = 87, len(self.buffer) = 30
len(sampled_ep) =274
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[61, 73, 74, 80]
[181, 8, 263, 232]
idx_offset = 57, self.episode_index = 87, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 75, 73, 75]
[255, 35, 127, 218]
idx_offset = 57, self.episode_index = 87, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[66, 68, 80, 59]
[250, 69, 76, 24]
idx_offset = 57, self.episode_index = 87, len(self.buffer) = 30
len(sampled_ep) =299
len(sa

Target Set Success
[80, 66, 62, 83]
[214, 134, 208, 173]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[82, 73, 81, 66]
[200, 209, 276, 40]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 86, 77, 62]
[136, 182, 49, 166]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 61, 80, 82]
[25, 179, 136, 116]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 86, 78, 64]
[198, 20, 137, 237]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
le

Target Set Success
[79, 75, 80, 66]
[281, 274, 173, 237]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[74, 69, 87, 73]
[145, 55, 21, 82]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 62, 65, 83]
[233, 101, 140, 161]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[84, 82, 78, 80]
[153, 114, 252, 228]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[85, 73, 63, 81]
[191, 244, 286, 227]
idx_offset = 58, self.episode_index = 88, len(self.buffer) = 30
len(sampled_ep) =270
l

Target Set Success
[68, 82, 79, 86]
[211, 255, 279, 260]
idx_offset = 59, self.episode_index = 89, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[73, 60, 77, 81]
[89, 240, 56, 271]
idx_offset = 59, self.episode_index = 89, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 60, 87, 74]
[54, 244, 211, 41]
idx_offset = 59, self.episode_index = 89, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[69, 79, 83, 79]
[232, 282, 199, 268]
idx_offset = 59, self.episode_index = 89, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[67, 80, 85, 86]
[186, 227, 56, 109]
idx_offset = 59, self.episode_index = 89, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[68, 78, 79, 75]
[127, 20, 290, 95]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 86, 63, 81]
[111, 257, 286, 286]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 64, 81, 83]
[161, 175, 199, 232]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 89, 78, 63]
[247, 230, 269, 129]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
(4, 8, 6)
Target Set Success
[63, 85, 86, 62]
[207, 104, 147, 68]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =296
l

Target Set Success
[67, 77, 86, 76]
[152, 160, 237, 142]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[81, 81, 69, 65]
[247, 92, 43, 141]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[63, 80, 62, 76]
[188, 212, 104, 171]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =296
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[82, 77, 64, 69]
[237, 233, 276, 183]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[82, 74, 68, 78]
[216, 74, 211, 19]
idx_offset = 60, self.episode_index = 90, len(self.buffer) = 30
len(sampled_ep) =299
le

Target Set Success
[64, 72, 83, 69]
[118, 185, 100, 105]
idx_offset = 61, self.episode_index = 91, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 75, 80, 84]
[43, 230, 274, 8]
idx_offset = 61, self.episode_index = 91, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[76, 69, 68, 63]
[230, 37, 246, 102]
idx_offset = 61, self.episode_index = 91, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =296
(4, 8, 6)
Target Set Success
[81, 80, 80, 75]
[44, 266, 99, 221]
idx_offset = 61, self.episode_index = 91, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[82, 80, 85, 73]
[269, 99, 169, 184]
idx_offset = 61, self.episode_index = 91, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[83, 90, 81, 74]
[217, 45, 256, 97]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 69, 81, 70]
[120, 86, 215, 196]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[83, 81, 66, 85]
[157, 173, 281, 155]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[76, 81, 75, 74]
[261, 151, 94, 246]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 74, 91, 80]
[261, 101, 119, 108]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
le

Target Set Success
[88, 75, 82, 80]
[138, 205, 273, 214]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 72, 80, 77]
[109, 226, 277, 38]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[66, 84, 68, 71]
[236, 87, 277, 160]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[80, 85, 72, 65]
[274, 34, 39, 40]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =274
(4, 8, 6)
Target Set Success
[77, 81, 78, 72]
[223, 247, 134, 21]
idx_offset = 62, self.episode_index = 92, len(self.buffer) = 30
len(sampled_ep) =270
len(

Target Set Success
[92, 85, 83, 81]
[294, 197, 220, 227]
idx_offset = 63, self.episode_index = 93, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[82, 67, 80, 75]
[136, 85, 274, 192]
idx_offset = 63, self.episode_index = 93, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 75, 67, 71]
[249, 19, 258, 47]
idx_offset = 63, self.episode_index = 93, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =262
(4, 8, 6)
Target Set Success
[87, 69, 73, 67]
[151, 118, 187, 186]
idx_offset = 63, self.episode_index = 93, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[90, 74, 79, 89]
[102, 92, 143, 124]
idx_offset = 63, self.episode_index = 93, len(self.buffer) = 30
len(sampled_ep) =299
le

Target Set Success
[69, 78, 80, 83]
[73, 95, 213, 141]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[77, 68, 77, 79]
[208, 198, 105, 209]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 79, 91, 79]
[93, 291, 177, 262]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[73, 84, 78, 87]
[227, 79, 58, 97]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =286
(4, 8, 6)
Target Set Success
[77, 87, 80, 83]
[93, 231, 210, 261]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =270
len(s

Target Set Success
[69, 83, 79, 85]
[62, 122, 260, 137]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[70, 72, 84, 75]
[42, 62, 254, 290]
idx_offset = 64, self.episode_index = 94, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
length of poped element = 274 ,cntr = 273
Target Set Success
[80, 75, 79, 78]
[19, 119, 275, 34]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 91, 83, 83]
[231, 255, 126, 169]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 91, 79, 81]
[233, 17, 224, 74]
idx_offset = 65, self.episode_index = 95, len(se

Target Set Success
[79, 85, 74, 73]
[126, 159, 241, 186]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 78, 69, 68]
[161, 250, 48, 131]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 75, 81, 77]
[74, 121, 226, 168]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[73, 83, 87, 75]
[206, 224, 210, 226]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[74, 82, 67, 83]
[95, 206, 150, 132]
idx_offset = 65, self.episode_index = 95, len(self.buffer) = 30
len(sampled_ep) =299
l

Target Set Success
[87, 74, 76, 84]
[220, 262, 254, 61]
idx_offset = 66, self.episode_index = 96, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[68, 70, 75, 73]
[69, 246, 249, 23]
idx_offset = 66, self.episode_index = 96, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 72, 79, 85]
[256, 29, 235, 202]
idx_offset = 66, self.episode_index = 96, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[80, 95, 85, 80]
[267, 57, 54, 89]
idx_offset = 66, self.episode_index = 96, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 87, 78, 68]
[63, 268, 15, 184]
idx_offset = 66, self.episode_index = 96, len(self.buffer) = 30
len(sampled_ep) =299
len(sam

Target Set Success
[81, 78, 89, 79]
[67, 195, 190, 293]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[88, 81, 84, 96]
[135, 121, 193, 161]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 73, 92, 88]
[164, 195, 19, 133]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[74, 83, 90, 86]
[86, 269, 253, 179]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 78, 81, 80]
[187, 281, 235, 290]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
l

Target Set Success
[79, 73, 79, 95]
[252, 98, 267, 101]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[76, 80, 73, 80]
[82, 268, 164, 113]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[86, 94, 88, 93]
[169, 17, 62, 144]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =282
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[93, 75, 86, 78]
[199, 94, 159, 82]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[91, 81, 91, 74]
[74, 52, 65, 83]
idx_offset = 67, self.episode_index = 97, len(self.buffer) = 30
len(sampled_ep) =286
len(samp

Target Set Success
[79, 85, 78, 81]
[231, 151, 177, 112]
idx_offset = 68, self.episode_index = 98, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[94, 86, 81, 74]
[185, 254, 90, 32]
idx_offset = 68, self.episode_index = 98, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[95, 94, 75, 82]
[42, 243, 66, 285]
idx_offset = 68, self.episode_index = 98, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[70, 94, 79, 78]
[123, 121, 141, 274]
idx_offset = 68, self.episode_index = 98, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 69, 80, 86]
[55, 279, 265, 154]
idx_offset = 68, self.episode_index = 98, len(self.buffer) = 30
len(sampled_ep) =299
len

Target Set Success
[79, 78, 75, 74]
[269, 280, 87, 103]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[76, 83, 81, 73]
[123, 200, 245, 125]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 90, 79, 73]
[139, 35, 281, 66]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 75, 77, 81]
[81, 130, 249, 234]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 93, 75, 93]
[46, 241, 68, 275]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(

Target Set Success
[81, 81, 81, 78]
[89, 35, 293, 287]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[87, 81, 94, 84]
[268, 275, 160, 189]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[80, 80, 81, 79]
[73, 94, 52, 260]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[94, 73, 93, 90]
[191, 78, 128, 206]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[92, 82, 74, 94]
[277, 294, 33, 123]
idx_offset = 69, self.episode_index = 99, len(self.buffer) = 30
len(sampled_ep) =299
len(s

Target Set Success
[91, 95, 74, 76]
[64, 130, 55, 17]
idx_offset = 70, self.episode_index = 100, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[86, 90, 82, 95]
[210, 63, 38, 198]
idx_offset = 70, self.episode_index = 100, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[83, 80, 82, 81]
[257, 203, 268, 272]
idx_offset = 70, self.episode_index = 100, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[86, 80, 75, 80]
[199, 156, 223, 152]
idx_offset = 70, self.episode_index = 100, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 73, 76, 79]
[34, 202, 104, 292]
idx_offset = 70, self.episode_index = 100, len(self.buffer) = 30
len(sampled_ep) =299

Target Set Success
[83, 74, 100, 79]
[271, 74, 285, 277]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 97, 83, 97]
[251, 144, 73, 284]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[100, 79, 80, 86]
[291, 27, 256, 240]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[77, 75, 86, 81]
[248, 106, 175, 90]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 83, 95, 76]
[49, 177, 157, 212]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =

Target Set Success
[77, 82, 93, 80]
[104, 257, 19, 289]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[75, 77, 73, 95]
[216, 63, 95, 12]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[92, 74, 99, 79]
[75, 214, 85, 79]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[76, 80, 86, 82]
[101, 276, 263, 118]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 75, 79, 73]
[205, 48, 289, 127]
idx_offset = 71, self.episode_index = 101, len(self.buffer) = 30
len(sampled_ep) =299
l

Target Set Success
[86, 83, 86, 95]
[174, 225, 235, 84]
idx_offset = 72, self.episode_index = 102, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[90, 79, 74, 81]
[173, 229, 233, 29]
idx_offset = 72, self.episode_index = 102, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 101, 77, 81]
[155, 57, 52, 178]
idx_offset = 72, self.episode_index = 102, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[74, 95, 87, 77]
[215, 83, 15, 229]
idx_offset = 72, self.episode_index = 102, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =286
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[80, 81, 78, 95]
[212, 10, 21, 39]
idx_offset = 72, self.episode_index = 102, len(self.buffer) = 30
len(sampled_ep) =299
l

Target Set Success
[100, 92, 86, 81]
[277, 264, 55, 237]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 81, 84, 77]
[228, 173, 34, 84]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[81, 96, 101, 84]
[195, 230, 114, 137]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[80, 90, 86, 81]
[141, 297, 124, 111]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[78, 95, 83, 75]
[106, 63, 250, 41]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =

Target Set Success
[75, 81, 86, 96]
[131, 153, 276, 105]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[94, 91, 88, 89]
[212, 69, 210, 291]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =286
len(sampled_ep) =282
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[76, 87, 78, 99]
[95, 246, 287, 85]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[93, 77, 83, 91]
[98, 254, 19, 47]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =286
(4, 8, 6)
Target Set Success
[98, 78, 80, 76]
[60, 288, 99, 156]
idx_offset = 73, self.episode_index = 103, len(self.buffer) = 30
len(sampled_ep) =266
l

Target Set Success
[79, 97, 76, 92]
[296, 202, 108, 12]
idx_offset = 74, self.episode_index = 104, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 102, 83, 87]
[45, 201, 107, 116]
idx_offset = 74, self.episode_index = 104, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
(4, 8, 6)
Target Set Success
[93, 90, 96, 99]
[254, 47, 167, 88]
idx_offset = 74, self.episode_index = 104, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[93, 93, 94, 76]
[146, 259, 208, 162]
idx_offset = 74, self.episode_index = 104, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[81, 79, 94, 92]
[39, 267, 225, 123]
idx_offset = 74, self.episode_index = 104, len(self.buffer) = 30
len(sampled_ep) =2

Target Set Success
[79, 102, 95, 99]
[76, 169, 136, 31]
idx_offset = 75, self.episode_index = 105, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 83, 84, 102]
[19, 115, 192, 98]
idx_offset = 75, self.episode_index = 105, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 81, 81, 78]
[44, 277, 228, 277]
idx_offset = 75, self.episode_index = 105, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 97, 95, 90]
[167, 123, 251, 209]
idx_offset = 75, self.episode_index = 105, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 95, 86, 93]
[116, 83, 199, 246]
idx_offset = 75, self.episode_index = 105, len(self.buffer) = 30
len(sampled_ep) =

Target Set Success
[82, 85, 96, 81]
[121, 76, 233, 252]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[86, 91, 97, 83]
[72, 163, 186, 286]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =292
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[83, 98, 94, 78]
[179, 57, 72, 153]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[96, 103, 90, 96]
[157, 116, 101, 214]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[88, 87, 80, 80]
[35, 126, 138, 170]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =2

Target Set Success
[96, 87, 80, 103]
[208, 15, 243, 102]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[98, 97, 83, 96]
[233, 78, 250, 33]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[79, 96, 105, 102]
[260, 135, 71, 129]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[103, 93, 89, 105]
[45, 19, 170, 34]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[81, 94, 80, 91]
[150, 120, 179, 56]
idx_offset = 76, self.episode_index = 106, len(self.buffer) = 30
len(sampled_ep) =

Target Set Success
[97, 103, 105, 98]
[196, 280, 212, 230]
idx_offset = 77, self.episode_index = 107, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[103, 106, 81, 79]
[38, 85, 256, 282]
idx_offset = 77, self.episode_index = 107, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[91, 86, 82, 88]
[282, 136, 277, 192]
idx_offset = 77, self.episode_index = 107, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[99, 82, 87, 102]
[87, 188, 121, 265]
idx_offset = 77, self.episode_index = 107, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[94, 79, 98, 79]
[134, 135, 223, 144]
idx_offset = 77, self.episode_index = 107, len(self.buffer) = 30
len(sampled_

Target Set Success
[83, 102, 86, 101]
[236, 199, 239, 33]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[82, 88, 96, 83]
[135, 181, 179, 149]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[80, 83, 97, 104]
[170, 155, 225, 87]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =222
(4, 8, 6)
Target Set Success
[81, 83, 84, 83]
[18, 249, 239, 34]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[86, 82, 106, 86]
[125, 275, 210, 126]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep

Target Set Success
[99, 103, 96, 80]
[72, 289, 61, 228]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 82, 86, 95]
[151, 128, 167, 208]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[92, 99, 81, 88]
[16, 106, 293, 65]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[86, 103, 81, 81]
[220, 38, 247, 182]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[81, 83, 86, 83]
[197, 134, 167, 217]
idx_offset = 78, self.episode_index = 108, len(self.buffer) = 30
len(sampled_ep) =

Target Set Success
[81, 83, 107, 103]
[216, 158, 136, 72]
idx_offset = 79, self.episode_index = 109, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 89, 103, 81]
[79, 37, 63, 91]
idx_offset = 79, self.episode_index = 109, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[95, 91, 81, 94]
[59, 47, 45, 231]
idx_offset = 79, self.episode_index = 109, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[85, 98, 106, 92]
[82, 242, 218, 117]
idx_offset = 79, self.episode_index = 109, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[106, 107, 94, 95]
[133, 127, 93, 37]
idx_offset = 79, self.episode_index = 109, len(self.buffer) = 30
len(sampled_ep) =29

Target Set Success
[92, 98, 98, 82]
[199, 188, 255, 252]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[103, 102, 101, 101]
[55, 27, 265, 29]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[101, 99, 82, 87]
[264, 250, 80, 259]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
(4, 8, 6)
Target Set Success
[101, 83, 107, 89]
[19, 224, 167, 272]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[95, 84, 92, 86]
[18, 264, 22, 61]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep)

Target Set Success
[104, 105, 91, 83]
[109, 54, 274, 257]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =222
len(sampled_ep) =240
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[82, 106, 106, 84]
[273, 54, 218, 103]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[82, 101, 88, 84]
[260, 105, 126, 135]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[99, 101, 86, 98]
[21, 58, 182, 234]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[109, 91, 94, 94]
[57, 210, 196, 33]
idx_offset = 80, self.episode_index = 110, len(self.buffer) = 30
len(sampled_e

Target Set Success
[87, 108, 107, 109]
[195, 197, 132, 222]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[108, 96, 110, 100]
[187, 233, 103, 7]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[103, 89, 98, 101]
[15, 157, 193, 67]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[106, 103, 103, 101]
[161, 283, 119, 55]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 90, 85, 107]
[245, 7, 82, 249]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampl

Target Set Success
[105, 95, 87, 84]
[191, 178, 256, 231]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampled_ep) =240
len(sampled_ep) =290
len(sampled_ep) =286
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[102, 85, 95, 86]
[193, 34, 277, 166]
idx_offset = 81, self.episode_index = 111, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
length of poped element = 299 ,cntr = 298
Target Set Success
[89, 96, 89, 85]
[172, 281, 101, 52]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[104, 86, 85, 109]
[45, 171, 144, 229]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_ep) =222
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[88, 95, 91, 88]
[230, 20, 193, 97]
idx_offset = 82, self.episode_index = 

Target Set Success
[110, 87, 108, 99]
[245, 230, 224, 113]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 92, 103, 107]
[187, 34, 163, 143]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[89, 89, 102, 106]
[101, 285, 71, 181]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[97, 92, 93, 111]
[59, 68, 23, 102]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 111, 99, 104]
[98, 216, 238, 51]
idx_offset = 82, self.episode_index = 112, len(self.buffer) = 30
len(sampled_

Target Set Success
[109, 90, 103, 94]
[55, 207, 214, 83]
idx_offset = 83, self.episode_index = 113, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[93, 105, 111, 95]
[138, 43, 146, 125]
idx_offset = 83, self.episode_index = 113, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =290
(4, 8, 6)
Target Set Success
[90, 90, 91, 101]
[236, 203, 255, 19]
idx_offset = 83, self.episode_index = 113, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 94, 110, 103]
[273, 235, 218, 133]
idx_offset = 83, self.episode_index = 113, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[95, 89, 110, 89]
[192, 125, 196, 165]
idx_offset = 83, self.episode_index = 113, len(self.buffer) = 30
len(sampl

Target Set Success
[112, 91, 98, 90]
[169, 194, 105, 222]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =266
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[91, 110, 111, 86]
[235, 208, 204, 227]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[86, 99, 109, 88]
[182, 87, 164, 112]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
(4, 8, 6)
Target Set Success
[113, 99, 102, 89]
[236, 195, 184, 7]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[91, 102, 110, 104]
[85, 284, 182, 217]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampl

Target Set Success
[101, 100, 110, 92]
[85, 212, 229, 271]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[110, 109, 91, 90]
[167, 245, 47, 56]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[101, 89, 88, 94]
[244, 71, 106, 193]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =282
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[86, 95, 110, 111]
[85, 122, 276, 76]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[108, 100, 90, 95]
[108, 84, 256, 39]
idx_offset = 84, self.episode_index = 114, len(self.buffer) = 30
len(sampled_

Target Set Success
[99, 111, 114, 99]
[99, 170, 141, 291]
idx_offset = 85, self.episode_index = 115, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[113, 103, 114, 94]
[226, 51, 234, 198]
idx_offset = 85, self.episode_index = 115, len(self.buffer) = 30
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[99, 102, 87, 110]
[148, 236, 269, 260]
idx_offset = 85, self.episode_index = 115, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[89, 108, 110, 110]
[239, 106, 268, 164]
idx_offset = 85, self.episode_index = 115, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[89, 112, 113, 114]
[170, 18, 177, 275]
idx_offset = 85, self.episode_index = 115, len(self.buffer) = 30
len(

Target Set Success
[110, 114, 114, 102]
[256, 205, 236, 87]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 114, 102, 94]
[166, 103, 220, 199]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[107, 108, 106, 114]
[190, 193, 65, 171]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[100, 101, 97, 98]
[155, 139, 47, 262]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[99, 112, 107, 104]
[66, 178, 131, 156]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
le

Target Set Success
[89, 101, 115, 97]
[140, 171, 208, 257]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =292
(4, 8, 6)
Target Set Success
[92, 99, 114, 107]
[10, 51, 278, 255]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[90, 112, 113, 91]
[127, 150, 203, 210]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =286
(4, 8, 6)
Target Set Success
[110, 99, 103, 114]
[202, 82, 90, 87]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[91, 110, 111, 102]
[189, 267, 205, 209]
idx_offset = 86, self.episode_index = 116, len(self.buffer) = 30
len(sam

Target Set Success
[116, 100, 100, 96]
[83, 203, 85, 99]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[93, 90, 106, 115]
[50, 81, 83, 252]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[103, 99, 108, 107]
[104, 285, 166, 259]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[101, 113, 110, 114]
[266, 233, 215, 125]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[110, 96, 91, 95]
[184, 165, 283, 128]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sam

Target Set Success
[98, 99, 114, 110]
[60, 195, 128, 63]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[98, 112, 114, 114]
[51, 202, 259, 135]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[110, 115, 114, 109]
[269, 95, 252, 282]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[109, 98, 106, 91]
[107, 70, 82, 127]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =286
(4, 8, 6)
Target Set Success
[111, 89, 116, 91]
[287, 108, 281, 174]
idx_offset = 87, self.episode_index = 117, len(self.buffer) = 30
len(sam

Target Set Success
[110, 114, 96, 101]
[246, 86, 232, 98]
idx_offset = 88, self.episode_index = 118, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[93, 108, 105, 108]
[212, 221, 182, 236]
idx_offset = 88, self.episode_index = 118, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =240
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[114, 114, 117, 114]
[83, 30, 19, 228]
idx_offset = 88, self.episode_index = 118, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[114, 110, 108, 114]
[247, 220, 133, 272]
idx_offset = 88, self.episode_index = 118, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[109, 91, 98, 113]
[20, 45, 163, 33]
idx_offset = 88, self.episode_index = 118, len(self.buffer) = 30
len(sa

Target Set Success
[115, 104, 97, 105]
[102, 148, 247, 54]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =222
len(sampled_ep) =292
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[95, 99, 111, 115]
[58, 154, 237, 126]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =290
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 102, 109, 116]
[229, 95, 211, 172]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[111, 91, 103, 118]
[218, 271, 98, 90]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =286
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 110, 114, 107]
[199, 272, 127, 214]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(

Target Set Success
[94, 92, 112, 116]
[83, 71, 253, 14]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[117, 110, 113, 118]
[209, 177, 16, 57]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[110, 113, 107, 93]
[72, 207, 254, 142]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[117, 111, 106, 113]
[97, 231, 81, 93]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
(4, 8, 6)
Target Set Success
[111, 98, 114, 117]
[237, 169, 177, 224]
idx_offset = 89, self.episode_index = 119, len(self.buffer) = 30
len(sam

Target Set Success
[105, 110, 112, 115]
[114, 257, 279, 117]
idx_offset = 90, self.episode_index = 120, len(self.buffer) = 30
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[116, 114, 118, 98]
[274, 132, 180, 59]
idx_offset = 90, self.episode_index = 120, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[99, 103, 115, 115]
[58, 91, 295, 126]
idx_offset = 90, self.episode_index = 120, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 102, 102, 119]
[235, 218, 292, 141]
idx_offset = 90, self.episode_index = 120, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[110, 119, 117, 117]
[273, 129, 176, 174]
idx_offset = 90, self.episode_index = 120, len(self.buffer) = 30

Target Set Success
[118, 98, 98, 119]
[184, 68, 69, 54]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[105, 106, 107, 101]
[24, 218, 193, 29]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[111, 115, 112, 118]
[39, 225, 147, 83]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[119, 97, 118, 115]
[186, 198, 269, 124]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =298
len(sampled_ep) =292
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[111, 117, 110, 103]
[65, 124, 71, 213]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sa

Target Set Success
[114, 112, 116, 110]
[269, 118, 78, 264]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 95, 108, 102]
[265, 233, 113, 192]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =290
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[101, 97, 97, 114]
[263, 39, 81, 187]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =292
len(sampled_ep) =292
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[107, 115, 114, 115]
[268, 94, 280, 269]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 99, 114, 120]
[139, 238, 270, 183]
idx_offset = 91, self.episode_index = 121, len(self.buffer) = 30
le

Target Set Success
[117, 116, 118, 114]
[151, 103, 187, 78]
idx_offset = 92, self.episode_index = 122, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[111, 118, 115, 107]
[281, 75, 293, 242]
idx_offset = 92, self.episode_index = 122, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[120, 119, 116, 115]
[85, 21, 186, 226]
idx_offset = 92, self.episode_index = 122, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 113, 114, 119]
[271, 214, 151, 104]
idx_offset = 92, self.episode_index = 122, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =284
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[112, 115, 99, 118]
[259, 118, 123, 55]
idx_offset = 92, self.episode_index = 122, len(self.buffer) = 30
l

Target Set Success
[96, 115, 122, 114]
[181, 94, 19, 48]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[102, 122, 95, 118]
[255, 205, 220, 144]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 99, 95, 104]
[92, 133, 265, 79]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =290
len(sampled_ep) =222
(4, 8, 6)
Target Set Success
[114, 111, 99, 107]
[226, 217, 142, 91]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 110, 103, 103]
[96, 247, 70, 213]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sam

Target Set Success
[113, 112, 100, 116]
[232, 243, 191, 56]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[98, 98, 115, 96]
[206, 168, 289, 16]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =266
len(sampled_ep) =266
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[106, 118, 99, 103]
[138, 278, 168, 261]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 113, 105, 111]
[19, 13, 44, 182]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =240
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[119, 113, 115, 119]
[143, 237, 289, 141]
idx_offset = 93, self.episode_index = 123, len(self.buffer) = 30
len

Target Set Success
[107, 105, 100, 102]
[252, 208, 16, 286]
idx_offset = 94, self.episode_index = 124, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 112, 115, 96]
[126, 155, 254, 180]
idx_offset = 94, self.episode_index = 124, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[116, 117, 119, 111]
[80, 115, 226, 105]
idx_offset = 94, self.episode_index = 124, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =298
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[108, 119, 111, 113]
[191, 283, 212, 219]
idx_offset = 94, self.episode_index = 124, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =242
(4, 8, 6)
Target Set Success
[98, 111, 109, 109]
[59, 144, 180, 37]
idx_offset = 94, self.episode_index = 124, len(self.buffer) = 30


Target Set Success
[118, 103, 111, 124]
[264, 188, 260, 261]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[117, 101, 114, 124]
[135, 60, 280, 162]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 113, 110, 105]
[187, 181, 223, 186]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[117, 124, 108, 120]
[238, 192, 142, 274]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[118, 119, 101, 110]
[93, 158, 132, 269]
idx_offset = 95, self.episode_index = 125, len(self.buffer) =

Target Set Success
[100, 110, 103, 110]
[121, 290, 162, 210]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 102, 110, 98]
[108, 297, 273, 163]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =266
(4, 8, 6)
Target Set Success
[124, 120, 105, 119]
[93, 40, 139, 296]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[103, 103, 99, 105]
[190, 192, 242, 155]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[112, 111, 111, 123]
[109, 63, 273, 100]
idx_offset = 95, self.episode_index = 125, len(self.buffer) = 30


Target Set Success
[124, 100, 113, 119]
[262, 103, 91, 114]
idx_offset = 96, self.episode_index = 126, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[125, 99, 120, 99]
[118, 113, 290, 187]
idx_offset = 96, self.episode_index = 126, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 114, 106, 116]
[269, 196, 268, 28]
idx_offset = 96, self.episode_index = 126, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[99, 108, 109, 121]
[21, 158, 271, 91]
idx_offset = 96, self.episode_index = 126, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =280
(4, 8, 6)
Target Set Success
[99, 103, 114, 125]
[290, 19, 278, 271]
idx_offset = 96, self.episode_index = 126, len(self.buffer) = 30
len

Target Set Success
[102, 99, 122, 110]
[209, 188, 176, 185]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 101, 102, 119]
[290, 230, 185, 121]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[115, 113, 124, 106]
[14, 145, 168, 163]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[111, 105, 107, 114]
[47, 24, 163, 187]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[118, 120, 102, 114]
[189, 226, 199, 262]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 3

Target Set Success
[110, 114, 119, 117]
[220, 246, 21, 183]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =298
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 114, 104, 113]
[94, 60, 92, 230]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =284
len(sampled_ep) =222
len(sampled_ep) =242
(4, 8, 6)
Target Set Success
[125, 112, 99, 110]
[187, 109, 290, 295]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[126, 110, 126, 116]
[282, 76, 66, 60]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[112, 114, 110, 117]
[18, 108, 253, 249]
idx_offset = 97, self.episode_index = 127, len(self.buffer) = 30
len

Target Set Success
[115, 117, 123, 108]
[147, 163, 12, 210]
idx_offset = 98, self.episode_index = 128, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =252
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[116, 101, 110, 119]
[185, 246, 255, 283]
idx_offset = 98, self.episode_index = 128, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[107, 117, 115, 119]
[127, 104, 202, 252]
idx_offset = 98, self.episode_index = 128, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[116, 110, 115, 102]
[37, 225, 291, 202]
idx_offset = 98, self.episode_index = 128, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 120, 101, 115]
[286, 143, 206, 53]
idx_offset = 98, self.episode_index = 128, len(self.buffer) = 

Target Set Success
[110, 105, 101, 107]
[200, 186, 272, 185]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[102, 114, 102, 104]
[192, 130, 171, 69]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =222
(4, 8, 6)
Target Set Success
[113, 119, 106, 105]
[176, 238, 55, 117]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =242
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =240
(4, 8, 6)
Target Set Success
[117, 117, 105, 117]
[283, 113, 144, 49]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =240
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[125, 114, 119, 104]
[116, 272, 125, 150]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 

Target Set Success
[116, 118, 103, 111]
[91, 127, 193, 193]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[115, 113, 106, 108]
[138, 215, 191, 62]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[119, 102, 110, 111]
[247, 273, 232, 14]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 114, 107, 117]
[149, 270, 159, 245]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[117, 122, 128, 118]
[85, 124, 210, 155]
idx_offset = 99, self.episode_index = 129, len(self.buffer) = 3

Target Set Success
[110, 116, 112, 124]
[212, 183, 287, 17]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[117, 116, 126, 106]
[67, 188, 247, 172]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[110, 115, 113, 114]
[214, 175, 232, 177]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[118, 107, 102, 115]
[121, 164, 185, 102]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 109, 106, 109]
[162, 43, 156, 19]
idx_offset = 100, self.episode_index = 130, len(self.buffer

Target Set Success
[125, 129, 116, 123]
[81, 144, 292, 31]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =252
(4, 8, 6)
Target Set Success
[121, 120, 117, 112]
[31, 137, 65, 121]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =280
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[117, 110, 113, 115]
[150, 283, 222, 250]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[127, 110, 115, 116]
[293, 291, 237, 186]
idx_offset = 100, self.episode_index = 130, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[110, 102, 107, 126]
[184, 262, 17, 245]
idx_offset = 100, self.episode_index = 130, len(self.buffer)

Target Set Success
[110, 122, 108, 111]
[274, 154, 237, 267]
idx_offset = 101, self.episode_index = 131, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[113, 120, 108, 109]
[23, 28, 80, 88]
idx_offset = 101, self.episode_index = 131, len(self.buffer) = 30
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[127, 107, 112, 116]
[63, 243, 154, 290]
idx_offset = 101, self.episode_index = 131, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[127, 111, 113, 107]
[173, 243, 234, 111]
idx_offset = 101, self.episode_index = 131, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[110, 126, 124, 120]
[182, 80, 81, 70]
idx_offset = 101, self.episode_index = 131, len(self.buffer) = 

Target Set Success
[115, 118, 114, 122]
[117, 187, 269, 27]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[113, 110, 106, 115]
[230, 209, 176, 183]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[121, 121, 117, 120]
[187, 150, 256, 17]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =280
len(sampled_ep) =280
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[121, 109, 125, 121]
[222, 12, 285, 166]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =280
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =280
(4, 8, 6)
Target Set Success
[114, 117, 126, 117]
[271, 169, 229, 183]
idx_offset = 102, self.episode_index = 132, len(self.buffe

Target Set Success
[115, 117, 117, 127]
[164, 278, 283, 234]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[127, 107, 128, 114]
[200, 226, 29, 226]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =280
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[114, 119, 106, 114]
[136, 136, 153, 193]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =284
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[107, 110, 128, 117]
[149, 88, 22, 105]
idx_offset = 102, self.episode_index = 132, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =280
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[116, 118, 118, 116]
[273, 131, 271, 288]
idx_offset = 102, self.episode_index = 132, len(self.buffe

Target Set Success
[115, 127, 114, 119]
[296, 250, 95, 142]
idx_offset = 103, self.episode_index = 133, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[110, 127, 108, 117]
[253, 195, 95, 119]
idx_offset = 103, self.episode_index = 133, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[128, 119, 116, 118]
[68, 111, 110, 111]
idx_offset = 103, self.episode_index = 133, len(self.buffer) = 30
len(sampled_ep) =280
len(sampled_ep) =298
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[118, 114, 128, 114]
[104, 170, 111, 213]
idx_offset = 103, self.episode_index = 133, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =280
len(sampled_ep) =284
(4, 8, 6)
Target Set Success
[124, 115, 126, 122]
[280, 172, 66, 124]
idx_offset = 103, self.episode_index = 133, len(self.buffer

Target Set Success
[111, 127, 109, 115]
[295, 284, 235, 266]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[108, 113, 107, 115]
[41, 202, 178, 14]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[132, 112, 110, 113]
[199, 210, 276, 227]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =242
(4, 8, 6)
Target Set Success
[112, 114, 115, 117]
[188, 226, 218, 176]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =284
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[123, 131, 110, 114]
[73, 80, 249, 77]
idx_offset = 104, self.episode_index = 134, len(self.buffer)

Target Set Success
[117, 108, 107, 108]
[211, 81, 204, 228]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =270
len(sampled_ep) =299
len(sampled_ep) =270
(4, 8, 6)
Target Set Success
[132, 110, 127, 131]
[18, 267, 159, 204]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[131, 118, 115, 132]
[277, 250, 289, 197]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[108, 113, 125, 129]
[129, 31, 14, 276]
idx_offset = 104, self.episode_index = 134, len(self.buffer) = 30
len(sampled_ep) =270
len(sampled_ep) =242
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[111, 117, 114, 126]
[86, 213, 227, 293]
idx_offset = 104, self.episode_index = 134, len(self.buffer)

Target Set Success
[126, 110, 127, 127]
[131, 250, 255, 208]
idx_offset = 105, self.episode_index = 135, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[111, 128, 109, 115]
[196, 28, 36, 280]
idx_offset = 105, self.episode_index = 135, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =280
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[109, 125, 121, 119]
[115, 164, 20, 236]
idx_offset = 105, self.episode_index = 135, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =280
len(sampled_ep) =298
(4, 8, 6)
Target Set Success
[127, 117, 130, 129]
[256, 137, 267, 144]
idx_offset = 105, self.episode_index = 135, len(self.buffer) = 30
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
len(sampled_ep) =299
(4, 8, 6)
Target Set Success
[114, 112, 134, 131]
[93, 277, 167, 238]
idx_offset = 105, self.episode_index = 135, len(self.buffer

KeyboardInterrupt: 

In [64]:
print(type(myBuffer))
print(type(myBuffer.buffer))
print(len(myBuffer.buffer))
#myBuffer.save(path2mdl)
#myBuffer = experience_buffer()
#myBuffer.load(path2mdl)
print(type(myBuffer.buffer))
print(len(myBuffer.buffer))
print(myBuffer.buffer[98].shape)

<class '__main__.experience_buffer'>
<class 'list'>
100
<class 'list'>
100


AttributeError: 'list' object has no attribute 'shape'

In [11]:
trainBatch = myBuffer.PRsample(batch_size,trace_length, i, num_episodes)

ValueError: operands could not be broadcast together with shapes (270,1,1,6) (7056,) 

In [11]:
print(td)
episode = episodeBuffer
episode = np.reshape(np.array(episode),[len(episode),6])
print(episode.shape)
print(episode[:, 5].shape)
td_error = episode[:, 5]
priority = np.absolute(td_error) + 1e-9 #proportionnal priority
print(priority.shape)
#Append episode to the priority replay tuple list
#Every experience in the episode has a tuple of the form:
#(episode_index, experience_index, priority)
episode_index = 0

exp_prio_tuples = []
for experience_index in range(trace_length-1, len(episode[:, 5])):
    tup = (episode_index, experience_index, float(priority[experience_index]))
    exp_prio_tuples.append(tup)


print(exp_prio_tuples[2])
print(exp_prio_tuples[0][0])

priorities_poweralpha = np.power([tup[2] for tup in exp_prio_tuples],myBuffer.alpha)
print(myBuffer.alpha)
print(priorities_poweralpha.shape)
sum_priorities_poweralpha = np.sum(priorities_poweralpha)
print(sum_priorities_poweralpha)
sampling_probabilities = np.divide(priorities_poweralpha, sum_priorities_poweralpha)
print(sampling_probabilities.shape)
sampled_indexes = np.random.choice(len(exp_prio_tuples), batch_size, p = sampling_probabilities)
print(sampled_indexes)

sampled_tuples = [exp_prio_tuples[idx] for idx in sampled_indexes]
ep_idx = [tup[0] for tup in sampled_tuples]
print(ep_idx)
exp_idx = [tup[1] for tup in sampled_tuples]
print(exp_idx)
sampledTraces = []
for i in range(0,batch_size):
    sampled_ep = myBuffer.buffer[ep_idx[i]]
    sampled_ep = np.reshape(np.array(sampled_ep),[len(sampled_ep),6])
    sampled_exp = sampled_ep[exp_idx[i]-(trace_length):exp_idx[i]] 
    sampledTraces.append(sampled_exp)
sampledTraces = np.array(sampledTraces)
print(sampledTraces.shape)
result = np.reshape(sampledTraces,[batch_size*trace_length,6])

[ 0.00737692]
(21, 6)
(21,)
(21,)
(0, 9, 0.006785107310861349)
0
0.4997
(14,)
1.22897394469
(14,)
[7 2 1 8]
[0, 0, 0, 0]
[14, 9, 8, 15]
(4, 8, 6)


In [13]:
print(sampledTraces[3].shape)
print(exp_prio_tuples[0][0])
print(len(exp_prio_tuples))
exp_prio_tuples.pop(0)
print(len(exp_prio_tuples))

(8, 6)
0
14
13
