In [85]:
! pip install python_tsp



In [86]:
import numpy as np
from collections import deque
import random
import copy
import math
import tensorflow as tf
import keras
from python_tsp.exact import solve_tsp_dynamic_programming
import numpy as np

# Modeling
def QNet(input_shape, output_shape):
  optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
  criterion = tf.losses.MeanSquaredError()
  Qnet = tf.keras.Sequential([
         tf.keras.layers.Dense(units=256, activation='relu', input_shape=(input_shape,)),
         tf.keras.layers.Dense(units=256, activation='relu'),
         tf.keras.layers.Dense(units=output_shape, activation='linear')
        ])
  Qnet.compile(optimizer=optimizer, loss=criterion)

  return Qnet

# 예시 데이터

input_shape = 10  # 예시 입력 차원
output_shape = 10  # 예시 출력 차원
input_data = tf.constant([[1.0] * input_shape], dtype=tf.float32)

# 학습 예시
model = QNet(input_shape, output_shape)
model.fit(input_data, input_data, batch_size=1)

# 예측 예시
output_data = model.predict(input_data)
print(output_data)

[[1.2476184  1.18904    1.2200441  1.7473732  1.5378778  1.2865838
  0.94348234 1.2788025  0.799048   1.0779693 ]]


In [87]:
def cal_dist(point1, point2):
    return math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

def TSPDistcost(points):
    num_points = len(points)
    distance_matrix = [[0] * num_points for _ in range(num_points)]

    for i in range(num_points):
        for j in range(i + 1, num_points):
            distance = cal_dist(points[i], points[j])
            distance_matrix[i][j] = distance
            distance_matrix[j][i] = distance
    return distance_matrix

def randcoord(numnodes, maxvalue):
    nodes = np.random.rand(numnodes, 2) * maxvalue
    return nodes

def Findoptsol(distance_matrix):
    permutation, distance = solve_tsp_dynamic_programming(np.array(distance_matrix))
    return permutation, distance

In [88]:
class Env():
    def __init__(self, points):
        self.nodes = points
        self.n_stops = len(points)
        self.dist_matrix = TSPDistcost(points)
        self.start = 0
        self.togo = [x for x in range(1, len(points))]
        self.visited = [0]
        self.done = False

    def reset(self):
        self.start = 0
        self.togo = [x for x in range(1, len(self.nodes))]
        self.visited = [0]
        self.done = False
        return self.start

    def step(self, s, a):
        s_next = a
        re = -self.dist_matrix[s][s_next]
        if s_next in self.togo:
            k = self.togo.index(s_next)
            self.togo.pop(k)
            self.visited.append(s_next)
        if len(self.togo) == 0:
            self.done = True
        return s_next, re, self.done

In [89]:
def create_one_hot_list(length, indices):
    one_hot_list = [0] * length

    for index in indices:
        if 0 <= index < length:
            one_hot_list[index] = 1

    return one_hot_list

def trans(s, visit, length):
    currentnode = np.identity(length)[s]
    visitednodes = create_one_hot_list(length, visit)
    input = np.concatenate([currentnode, visitednodes], axis=0)
    return input

In [90]:
class Agent():
    def __init__(self, states_size, actions_size, buffer_limit=20, batch_size=10, \
                 epsilon_min=0.01, epsilon_decay=0.99, gamma=0.95, lr=0.01, C=10):
        self.states_size = states_size
        self.actions_size = actions_size
        self.batch_size = batch_size
        self.buffer_limit = buffer_limit
        self.epsilon = 1
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.gamma = gamma
        self.lr = lr
        self.C = C
        self.Qnet = QNet(states_size, actions_size)
        self.Tnet = copy.deepcopy(self.Qnet)
        self.buffer = deque(maxlen=buffer_limit)

    def reset_memory(self):
        self.buffer = deque(maxlen=self.buffer_limit)

    def act(self, input):
        input = input.reshape(1,-1)
        q = self.Qnet.predict(input, verbose=0)
        if np.random.rand() > self.epsilon:
            a = np.argmax(q)
        else:
            a = np.random.randint(self.actions_size)
        return a

    def put(self, s, visit, a, r, s_next):
        self.buffer.append((s, visit, a, r, s_next))

    def sample(self):
        mini_batch = random.sample(self.buffer, self.batch_size)
        input_lst, a_lst, r_lst, input_next_lst = [], [], [], []

        for transition in mini_batch:
            s, visit, a, r, s_next = transition

            # s input setting for Q(s,a)
            input = trans(s, visit, self.actions_size)
            input_lst.append(input)

            # s' input setting for max(Q(s_next,a'))
            visit.append(s_next)
            input_next = trans(s_next, visit, self.actions_size)
            input_next_lst.append(input_next)

            a_lst.append(a)
            r_lst.append(r)

        return np.array(input_lst), np.array(a_lst), np.array(r_lst), np.array(input_next_lst)

    def train(self):
        input_lst, a_lst, r_lst, input_next_lst = self.sample()
        print(input_lst.shape, input_next_lst.shape)
        y_hat = np.max(self.Qnet.predict(input_lst.T, verbose=0), axis=1)
        y = r_lst + self.gamma * np.max(self.Tnet.predict(input_next_lst.T, verbose=0), axis=1)
        for i in range(10):
          self.Qnet.fit(y_hat[i], y[i], verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def netsynch(self):
        self.Tnet = copy.deepcopy(self.Qnet)

In [91]:
def run_episode(env, agent, num_episodes, verbose=1):
    s = env.reset()
    agent.reset_memory()
    max_step = env.n_stops
    episode_reward = 0

    i = 0
    while i < max_step:
        # Choose an Action
        input_data = trans(s, env.visited, max_step)
        a = agent.act(input_data)  # e-greedy policy

        # Take the action, and get the reward from environment
        if a in env.togo:
            s_next, r, done = env.step(s, a)

            if verbose:
                print(s_next, r, done)

            # Update Model
            # Experience Replay
            # put sample
            agent.put(s, env.visited, a, r, s_next)
            # train Qnet(PredictedNetwork) using a batch of samples in the buffer
            if len(agent.buffer) > agent.batch_size:
               agent.train()

            # TargetNetwork synchronizing with PredictionNetwork
            if num_episodes % agent.C == 0 and num_episodes != 0:
                agent.netsynch()

            # Update the caches
            episode_reward += r
            s = s_next
        else:
            r = -500
            agent.put(s, env.visited, a, r, a)
            episode_reward += r
            done = False
        # one step is terminated, check if the episode is terminated
        i += 1
        if done:
            env.visited.append(0)
            r = 500
            agent.put(s, env.visited, a, r, 0)
            episode_reward += r
            break

    return env, agent, episode_reward

In [92]:
# TSP 환경 생성
numnodes, maxvalue = 10,10
points = randcoord(numnodes, maxvalue)  # 간단한 예시 좌표
tsp_env = Env(points)

# DQN 에이전트 생성
agent = Agent(states_size=2*tsp_env.n_stops, actions_size=len(points))

# DQN 학습
num_episodes = 800
history = []
for episode in range(num_episodes):
    tsp_env, agent, total_reward = run_episode(tsp_env, agent, episode, verbose=1)
    history.append(total_reward)
    print(f"Episode {episode + 1}, Total Reward: {total_reward}")

# 학습된 에이전트로 최적 경로 예측
def predict_optimal_path(env, agent):
    s = env.reset()
    state = trans(s, [], agent.actions_size)
    path = [s]

    while len(path) < env.n_stops:
        input_data = trans(s, path, agent.actions_size)
        a = agent.act(input_data)
        s_next, _, _ = env.step(s, a)
        path.append(s_next)
        s = s_next
    path.append(s)
    return path

# 최적 경로 및 길이 출력
optimal_path = predict_optimal_path(tsp_env, agent)
optimal_length = sum([tsp_env.dist_matrix[optimal_path[i - 1]][optimal_path[i]] for i in range(1, len(optimal_path))])

print(f"Optimal Path: {optimal_path}")
print(f"Optimal Path Length: {optimal_length}")

permutation, distance = Findoptsol(np.array(tsp_env.dist_matrix))
print(f"real_Optimal Path: {permutation}")
print(f"real_Optimal Path Length: {distance}")

# 1000번까지 에피소드를 진행했으나 적합이 잘 되지 않음



5 -1.9895625089960105 False
3 -4.905091251117594 False
6 -6.75623719662853 False
2 -8.093492715402329 False
1 -7.620784652302808 False
7 -8.63933616755769 False
Episode 1, Total Reward: -2038.004504492005
4 -10.843880223714587 False
3 -6.980965374268762 False
1 -8.062794569850828 False
7 -8.63933616755769 False
6 -9.189377103453513 False
2 -8.093492715402329 False
Episode 2, Total Reward: -2051.8098461542477
5 -1.9895625089960105 False
6 -2.5146136553162997 False
8 -5.004577401674917 False
7 -9.21539248558825 False
1 -8.63933616755769 False
3 -8.062794569850828 False
Episode 3, Total Reward: -2035.426276788984
9 -8.579293986144062 False
3 -5.895412073873364 False
4 -6.980965374268762 False
8 -9.351745191179539 False
7 -9.21539248558825 False
5 -6.890753875802023 False
1 -3.2013328857401957 False
Episode 4, Total Reward: -1550.114895872596
7 -8.756186467218008 False
4 -4.250813147599771 False
8 -9.351745191179539 False
1 -6.742121402603406 False
Episode 5, Total Reward: -3029.1008662086



7 -9.21539248558825 False




3 -7.705775893101598 False
4 -6.980965374268762 False




2 -4.585675628012074 False




5 -5.77799343429354 False
Episode 11, Total Reward: -2038.834847802043
1 -2.2601094764485308 False
4 -11.509995892926003 False
8 -9.351745191179539 False
6 -5.004577401674917 False
7 -9.189377103453513 False
3 -7.705775893101598 False
Episode 12, Total Reward: -2045.0215809587842
3 -6.226340755216833 False
8 -2.6150299035155595 False
2 -8.192416453410834 False
6 -8.093492715402329 False
7 -9.189377103453513 False
9 -2.3673957673885857 False
Episode 13, Total Reward: -2036.6840526983879
8 -4.569044986778715 False
1 -6.742121402603406 False
4 -11.509995892926003 False
3 -6.980965374268762 False
2 -6.841918956903541 False
9 -2.2550833802954076 False
5 -6.591803321334266 False
Episode 14, Total Reward: -1545.4909333151102
5 -1.9895625089960105 False
7 -6.890753875802023 False
3 -7.705775893101598 False
9 -5.895412073873364 False
Episode 15, Total Reward: -3022.481504351773
1 -2.2601094764485308 False
2 -7.620784652302808 False
9 -2.2550833802954076 False
8 -7.853704773177191 False
Episode 



7 -9.189377103453513 False
Episode 20, Total Reward: -2052.7631604137196
6 -0.557563640416601 False




7 -9.189377103453513 False
5 -6.890753875802023 False




3 -4.905091251117594 False
1 -8.062794569850828 False




8 -6.742121402603406 False
Episode 21, Total Reward: -2036.347701843244
7 -8.756186467218008 False
9 -2.3673957673885857 False
4 -2.4969092474766588 False
6 -11.374743052916678 False
Episode 22, Total Reward: -3024.995234535
7 -8.756186467218008 False
3 -7.705775893101598 False
1 -8.062794569850828 False
6 -2.0415606574273 False
9 -9.089599829761081 False
4 -2.4969092474766588 False
Episode 23, Total Reward: -2038.1528266648354
9 -8.579293986144062 False
6 -9.089599829761081 False
3 -6.75623719662853 False
5 -4.905091251117594 False
7 -6.890753875802023 False
2 -1.1138456925079876 False
8 -8.192416453410834 False
Episode 24, Total Reward: -1545.5272382853723
7 -8.756186467218008 False
9 -2.3673957673885857 False
3 -5.895412073873364 False
1 -8.062794569850828 False
6 -2.0415606574273 False
2 -8.093492715402329 False
4 -4.585675628012074 False
Episode 25, Total Reward: -1539.8025178791725
1 -2.2601094764485308 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
2 -6.84191895690



2 -4.585675628012074 False
7 -1.1138456925079876 False




3 -7.705775893101598 False
5 -4.905091251117594 False




8 -4.071101157390693 False




9 -7.853704773177191 False
Episode 31, Total Reward: -1541.0790746190216
1 -2.2601094764485308 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
3 -5.895412073873364 False
Episode 32, Total Reward: -3019.1622534852686
4 -10.843880223714587 False
1 -11.509995892926003 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
3 -5.895412073873364 False
6 -6.75623719662853 False
Episode 33, Total Reward: -2046.0122573220888
7 -8.756186467218008 False
6 -9.189377103453513 False
4 -11.374743052916678 False
1 -11.509995892926003 False
3 -8.062794569850828 False
2 -6.841918956903541 False
9 -2.2550833802954076 False
8 -7.853704773177191 False
Episode 34, Total Reward: -1065.8438041967413
6 -0.557563640416601 False
7 -9.189377103453513 False
4 -4.250813147599771 False
2 -4.585675628012074 False
3 -6.841918956903541 False
8 -2.6150299035155595 False
Episode 35, Total Reward: -2028.040378379901
8 -4.569044986778715 False
6 -5.004577401674917 False
5 -2.5146136553162997 False
7 -6.890



5 -6.591803321334266 False
7 -6.890753875802023 False




6 -9.189377103453513 False




1 -2.0415606574273 False
3 -8.062794569850828 False




Episode 41, Total Reward: -2041.3555835140119
1 -2.2601094764485308 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
8 -7.853704773177191 False
5 -4.071101157390693 False
3 -4.905091251117594 False
Episode 42, Total Reward: -2030.0967385930803
7 -8.756186467218008 False
9 -2.3673957673885857 False
4 -2.4969092474766588 False
2 -4.585675628012074 False
8 -8.192416453410834 False
Episode 43, Total Reward: -2526.3985835635062
2 -7.653020649610424 False
3 -6.841918956903541 False
4 -6.980965374268762 False
6 -11.374743052916678 False
Episode 44, Total Reward: -3032.850648033699
5 -1.9895625089960105 False
4 -8.86012987256621 False
9 -2.4969092474766588 False
3 -5.895412073873364 False
6 -6.75623719662853 False
7 -9.189377103453513 False
Episode 45, Total Reward: -2035.1876280029942
1 -2.2601094764485308 False
5 -3.2013328857401957 False
4 -8.86012987256621 False
6 -11.374743052916678 False
Episode 46, Total Reward: -3025.6963152876715
4 -10.843880223714587 False
7 -4.25081314759



6 -9.189377103453513 False




1 -2.0415606574273 False




5 -3.2013328857401957 False




3 -4.905091251117594 False




8 -2.6150299035155595 False




9 -7.853704773177191 False
Episode 51, Total Reward: -1538.5622830416492
8 -4.569044986778715 False
4 -9.351745191179539 False
6 -11.374743052916678 False
2 -8.093492715402329 False
7 -1.1138456925079876 False
5 -6.890753875802023 False
1 -3.2013328857401957 False
Episode 52, Total Reward: -1544.5949584003274
1 -2.2601094764485308 False
8 -6.742121402603406 False
3 -2.6150299035155595 False
2 -6.841918956903541 False
Episode 53, Total Reward: -3018.459179739471
1 -2.2601094764485308 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
2 -2.2550833802954076 False
6 -8.093492715402329 False
5 -2.5146136553162997 False
Episode 54, Total Reward: -2026.1300311624086
3 -6.226340755216833 False
6 -6.75623719662853 False
2 -8.093492715402329 False
7 -1.1138456925079876 False
1 -8.63933616755769 False
9 -9.070180154761154 False
8 -7.853704773177191 False
Episode 55, Total Reward: -1547.7531374552516
3 -6.226340755216833 False
9 -5.895412073873364 False
6 -9.089599829761081 False
7 -9.189



4 -10.843880223714587 False
2 -4.585675628012074 False




5 -5.77799343429354 False
7 -6.890753875802023 False




3 -7.705775893101598 False




9 -5.895412073873364 False
Episode 61, Total Reward: -2041.6994911287973
1 -2.2601094764485308 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
2 -2.2550833802954076 False
4 -4.585675628012074 False
5 -8.86012987256621 False
8 -4.071101157390693 False
Episode 62, Total Reward: -1533.0388314496593
8 -4.569044986778715 False
7 -9.21539248558825 False
1 -8.63933616755769 False
9 -9.070180154761154 False
2 -2.2550833802954076 False
6 -8.093492715402329 False
5 -2.5146136553162997 False
4 -8.86012987256621 False
Episode 63, Total Reward: -1053.217273418266
6 -0.557563640416601 False
8 -5.004577401674917 False
7 -9.21539248558825 False
9 -2.3673957673885857 False
4 -2.4969092474766588 False
1 -11.509995892926003 False
Episode 64, Total Reward: -2031.1518344354708
8 -4.569044986778715 False
9 -7.853704773177191 False
2 -2.2550833802954076 False
7 -1.1138456925079876 False
1 -8.63933616755769 False
Episode 65, Total Reward: -2524.431015000317
9 -8.579293986144062 False
1 -9.07018015



3 -8.062794569850828 False




4 -6.980965374268762 False




5 -8.86012987256621 False




6 -2.5146136553162997 False




2 -8.093492715402329 False
Episode 71, Total Reward: -2036.772105663853
9 -8.579293986144062 False
4 -2.4969092474766588 False
7 -4.250813147599771 False
6 -9.189377103453513 False
3 -6.75623719662853 False
8 -2.6150299035155595 False
Episode 72, Total Reward: -2033.887660584818
2 -7.653020649610424 False
5 -5.77799343429354 False
3 -4.905091251117594 False
4 -6.980965374268762 False
8 -9.351745191179539 False
6 -5.004577401674917 False
1 -2.0415606574273 False
9 -9.070180154761154 False
Episode 73, Total Reward: -1050.7851341143332
9 -8.579293986144062 False
8 -7.853704773177191 False
2 -8.192416453410834 False
4 -4.585675628012074 False
Episode 74, Total Reward: -3029.2110908407444
2 -7.653020649610424 False
3 -6.841918956903541 False
9 -5.895412073873364 False
1 -9.070180154761154 False
6 -2.0415606574273 False
8 -5.004577401674917 False
5 -4.071101157390693 False
Episode 75, Total Reward: -1540.5777710516413
6 -0.557563640416601 False
3 -6.75623719662853 False
7 -7.705775893101598 



9 -6.591803321334266 False
Episode 80, Total Reward: -2036.578763704962
4 -10.843880223714587 False




1 -11.509995892926003 False
2 -7.620784652302808 False




5 -5.77799343429354 False




3 -4.905091251117594 False
9 -5.895412073873364 False




Episode 81, Total Reward: -2046.5531575282278
6 -0.557563640416601 False
3 -6.75623719662853 False
7 -7.705775893101598 False
2 -1.1138456925079876 False
4 -4.585675628012074 False
Episode 82, Total Reward: -2520.7190980506666
4 -10.843880223714587 False
9 -2.4969092474766588 False
3 -5.895412073873364 False
2 -6.841918956903541 False
5 -5.77799343429354 False
Episode 83, Total Reward: -2531.856113936262
5 -1.9895625089960105 False
3 -4.905091251117594 False
6 -6.75623719662853 False
2 -8.093492715402329 False
9 -2.2550833802954076 False
7 -2.3673957673885857 False
Episode 84, Total Reward: -2026.3668628198286
3 -6.226340755216833 False
4 -6.980965374268762 False
8 -9.351745191179539 False
2 -8.192416453410834 False
5 -5.77799343429354 False
Episode 85, Total Reward: -2536.5294612083694
2 -7.653020649610424 False
5 -5.77799343429354 False
3 -4.905091251117594 False
1 -8.062794569850828 False
6 -2.0415606574273 False
Episode 86, Total Reward: -2528.4404605622994
8 -4.569044986778715 Fal



6 -5.004577401674917 False
Episode 90, Total Reward: -2038.0933715009937
3 -6.226340755216833 False




9 -5.895412073873364 False




6 -9.089599829761081 False




1 -2.0415606574273 False




4 -11.509995892926003 False




5 -8.86012987256621 False
Episode 91, Total Reward: -2043.6230390817707
2 -7.653020649610424 False
4 -4.585675628012074 False
6 -11.374743052916678 False
8 -5.004577401674917 False
7 -9.21539248558825 False
3 -7.705775893101598 False
Episode 92, Total Reward: -2045.5391851109039
8 -4.569044986778715 False
7 -9.21539248558825 False
6 -9.189377103453513 False
5 -2.5146136553162997 False
4 -8.86012987256621 False
2 -4.585675628012074 False
Episode 93, Total Reward: -2038.934233731715
6 -0.557563640416601 False
1 -2.0415606574273 False
3 -8.062794569850828 False
2 -6.841918956903541 False
Episode 94, Total Reward: -3017.503837824598
5 -1.9895625089960105 False
2 -5.77799343429354 False
9 -2.2550833802954076 False
4 -2.4969092474766588 False
7 -4.250813147599771 False
6 -9.189377103453513 False
Episode 95, Total Reward: -2025.9597388221148
1 -2.2601094764485308 False
8 -6.742121402603406 False
9 -7.853704773177191 False
4 -2.4969092474766588 False
3 -6.980965374268762 False
Episode 96, Tota



8 -4.569044986778715 False
1 -6.742121402603406 False




9 -9.070180154761154 False
5 -6.591803321334266 False




6 -2.5146136553162997 False
Episode 101, Total Reward: -2529.487763520794
3 -6.226340755216833 False
5 -4.905091251117594 False
6 -2.5146136553162997 False
8 -5.004577401674917 False
2 -8.192416453410834 False
4 -4.585675628012074 False
Episode 102, Total Reward: -2031.4287151447484
3 -6.226340755216833 False
9 -5.895412073873364 False
2 -2.2550833802954076 False
Episode 103, Total Reward: -3514.3768362093856
4 -10.843880223714587 False
5 -8.86012987256621 False
9 -6.591803321334266 False
2 -2.2550833802954076 False
7 -1.1138456925079876 False
3 -7.705775893101598 False
Episode 104, Total Reward: -2037.37051838352
6 -0.557563640416601 False
3 -6.75623719662853 False
4 -6.980965374268762 False
8 -9.351745191179539 False
1 -6.742121402603406 False
5 -3.2013328857401957 False
7 -6.890753875802023 False
Episode 105, Total Reward: -1540.480719566639
7 -8.756186467218008 False
3 -7.705775893101598 False
2 -6.841918956903541 False
4 -4.585675628012074 False
1 -11.509995892926003 False
9 -9.07



6 -2.0415606574273 False




7 -9.189377103453513 False




5 -6.890753875802023 False




8 -4.071101157390693 False
Episode 111, Total Reward: -2524.452902270522
6 -0.557563640416601 False
3 -6.75623719662853 False
2 -6.841918956903541 False
7 -1.1138456925079876 False
4 -4.250813147599771 False
1 -11.509995892926003 False
5 -3.2013328857401957 False
Episode 112, Total Reward: -1534.2317074127227
1 -2.2601094764485308 False
7 -8.63933616755769 False
5 -6.890753875802023 False
2 -5.77799343429354 False
4 -4.585675628012074 False
6 -11.374743052916678 False
Episode 113, Total Reward: -2039.5286116350305
9 -8.579293986144062 False
7 -2.3673957673885857 False
3 -7.705775893101598 False
1 -8.062794569850828 False
5 -3.2013328857401957 False
6 -2.5146136553162997 False
2 -8.093492715402329 False
Episode 114, Total Reward: -1540.524699472944
9 -8.579293986144062 False
2 -2.2550833802954076 False
5 -5.77799343429354 False
3 -4.905091251117594 False
Episode 115, Total Reward: -3021.5174620518505
6 -0.557563640416601 False
3 -6.75623719662853 False
5 -4.905091251117594 False
4 -8.86



6 -11.374743052916678 False
Episode 120, Total Reward: -2543.5737833102658
3 -6.226340755216833 False




1 -8.062794569850828 False
5 -3.2013328857401957 False




4 -8.86012987256621 False
Episode 121, Total Reward: -3026.3505980833743
4 -10.843880223714587 False
9 -2.4969092474766588 False
6 -9.089599829761081 False
7 -9.189377103453513 False
2 -1.1138456925079876 False
8 -8.192416453410834 False
5 -4.071101157390693 False
Episode 122, Total Reward: -1544.9971297077154
9 -8.579293986144062 False
2 -2.2550833802954076 False
3 -6.841918956903541 False
1 -8.062794569850828 False
7 -8.63933616755769 False
4 -4.250813147599771 False
8 -9.351745191179539 False
Episode 123, Total Reward: -1547.9809853995307
6 -0.557563640416601 False
1 -2.0415606574273 False
7 -8.63933616755769 False
3 -7.705775893101598 False
5 -4.905091251117594 False
Episode 124, Total Reward: -2523.8493276096206
6 -0.557563640416601 False
9 -9.089599829761081 False
5 -6.591803321334266 False
4 -8.86012987256621 False
3 -6.980965374268762 False
1 -8.062794569850828 False
2 -7.620784652302808 False
Episode 125, Total Reward: -1547.7636412605004
9 -8.579293986144062 False
1 -9.070180



4 -10.843880223714587 False




2 -4.585675628012074 False




7 -1.1138456925079876 False




9 -2.3673957673885857 False




6 -9.089599829761081 False




5 -2.5146136553162997 False




3 -4.905091251117594 False
Episode 131, Total Reward: -1535.420102047818
6 -0.557563640416601 False
7 -9.189377103453513 False
8 -9.21539248558825 False
1 -6.742121402603406 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
2 -6.841918956903541 False
Episode 132, Total Reward: -1540.6527977258231
7 -8.756186467218008 False
5 -6.890753875802023 False
2 -5.77799343429354 False
8 -8.192416453410834 False
1 -6.742121402603406 False
Episode 133, Total Reward: -2536.359471633328
2 -7.653020649610424 False
6 -8.093492715402329 False
4 -11.374743052916678 False
5 -8.86012987256621 False
Episode 134, Total Reward: -3035.9813862904957
8 -4.569044986778715 False
7 -9.21539248558825 False
9 -2.3673957673885857 False
1 -9.070180154761154 False
3 -8.062794569850828 False
Episode 135, Total Reward: -2533.2848079643672
5 -1.9895625089960105 False
2 -5.77799343429354 False
8 -8.192416453410834 False
1 -6.742121402603406 False
6 -2.0415606574273 False
Episode 136, Total Reward: -2524.74365445



7 -2.3673957673885857 False
Episode 140, Total Reward: -1537.9685677807956
2 -7.653020649610424 False




1 -7.620784652302808 False




4 -11.509995892926003 False
3 -6.980965374268762 False




7 -7.705775893101598 False
5 -6.890753875802023 False




9 -6.591803321334266 False
Episode 141, Total Reward: -1554.9530996593462
4 -10.843880223714587 False
6 -11.374743052916678 False
8 -5.004577401674917 False
7 -9.21539248558825 False
3 -7.705775893101598 False
2 -6.841918956903541 False
Episode 142, Total Reward: -2050.9862880138994
2 -7.653020649610424 False
8 -8.192416453410834 False
5 -4.071101157390693 False
4 -8.86012987256621 False
6 -11.374743052916678 False
Episode 143, Total Reward: -2540.1514111858946
9 -8.579293986144062 False
4 -2.4969092474766588 False
3 -6.980965374268762 False
5 -4.905091251117594 False
8 -4.071101157390693 False
Episode 144, Total Reward: -2527.033361016398
5 -1.9895625089960105 False
4 -8.86012987256621 False
8 -9.351745191179539 False
3 -2.6150299035155595 False
1 -8.062794569850828 False
9 -9.070180154761154 False
2 -2.2550833802954076 False
Episode 145, Total Reward: -1542.2045255811647
8 -4.569044986778715 False
2 -8.192416453410834 False
9 -2.2550833802954076 False
3 -5.895412073873364 False
6 -6.



2 -8.093492715402329 False
7 -1.1138456925079876 False




3 -7.705775893101598 False




5 -4.905091251117594 False
4 -8.86012987256621 False




1 -11.509995892926003 False
Episode 151, Total Reward: -1542.7458949580384
9 -8.579293986144062 False
5 -6.591803321334266 False
8 -4.071101157390693 False
6 -5.004577401674917 False
4 -11.374743052916678 False
2 -4.585675628012074 False
7 -1.1138456925079876 False
Episode 152, Total Reward: -1541.3210402399807
5 -1.9895625089960105 False
8 -4.071101157390693 False
2 -8.192416453410834 False
3 -6.841918956903541 False
6 -6.75623719662853 False
7 -9.189377103453513 False
Episode 153, Total Reward: -2037.0406133767833
4 -10.843880223714587 False
7 -4.250813147599771 False
5 -6.890753875802023 False
2 -5.77799343429354 False
1 -7.620784652302808 False
3 -8.062794569850828 False
Episode 154, Total Reward: -2043.4470199035636
6 -0.557563640416601 False
7 -9.189377103453513 False
4 -4.250813147599771 False
3 -6.980965374268762 False
8 -2.6150299035155595 False
1 -6.742121402603406 False
Episode 155, Total Reward: -2030.3358705718576
2 -7.653020649610424 False
6 -8.093492715402329 False
8 -5.



6 -11.374743052916678 False
5 -2.5146136553162997 False




1 -3.2013328857401957 False
9 -9.070180154761154 False




7 -2.3673957673885857 False
3 -7.705775893101598 False




Episode 161, Total Reward: -1547.0779216329393
5 -1.9895625089960105 False
8 -4.071101157390693 False
6 -5.004577401674917 False
4 -11.374743052916678 False
3 -6.980965374268762 False
2 -6.841918956903541 False
9 -2.2550833802954076 False
Episode 162, Total Reward: -1538.5179518324462
7 -8.756186467218008 False
6 -9.189377103453513 False
3 -6.75623719662853 False
2 -6.841918956903541 False
8 -8.192416453410834 False
Episode 163, Total Reward: -2539.7361361776143
5 -1.9895625089960105 False
4 -8.86012987256621 False
7 -4.250813147599771 False
2 -1.1138456925079876 False
3 -6.841918956903541 False
8 -2.6150299035155595 False
6 -5.004577401674917 False
Episode 164, Total Reward: -1530.6758774837638
7 -8.756186467218008 False
9 -2.3673957673885857 False
3 -5.895412073873364 False
2 -6.841918956903541 False
8 -8.192416453410834 False
4 -9.351745191179539 False
Episode 165, Total Reward: -2041.405074909974
2 -7.653020649610424 False
1 -7.620784652302808 False
8 -6.742121402603406 False
7 -9.



6 -5.004577401674917 False
Episode 170, Total Reward: -2033.2004314620744
7 -8.756186467218008 False




5 -6.890753875802023 False
8 -4.071101157390693 False




2 -8.192416453410834 False
Episode 171, Total Reward: -3027.9104579538216
6 -0.557563640416601 False
1 -2.0415606574273 False
3 -8.062794569850828 False
9 -5.895412073873364 False
8 -7.853704773177191 False
7 -9.21539248558825 False
Episode 172, Total Reward: -2033.6264282003335
4 -10.843880223714587 False
6 -11.374743052916678 False
5 -2.5146136553162997 False
8 -4.071101157390693 False
1 -6.742121402603406 False
Episode 173, Total Reward: -2535.5464594919417
4 -10.843880223714587 False
9 -2.4969092474766588 False
3 -5.895412073873364 False
2 -6.841918956903541 False
6 -8.093492715402329 False
Episode 174, Total Reward: -2534.17161321737
3 -6.226340755216833 False
7 -7.705775893101598 False
8 -9.21539248558825 False
2 -8.192416453410834 False
6 -8.093492715402329 False
Episode 175, Total Reward: -2539.43341830272
9 -8.579293986144062 False
1 -9.070180154761154 False
7 -8.63933616755769 False
2 -1.1138456925079876 False
4 -4.585675628012074 False
3 -6.980965374268762 False
5 -4.9050912



6 -0.557563640416601 False




8 -5.004577401674917 False
5 -4.071101157390693 False




3 -4.905091251117594 False




7 -7.705775893101598 False




2 -1.1138456925079876 False
Episode 181, Total Reward: -2023.3579550362094
3 -6.226340755216833 False
8 -2.6150299035155595 False
5 -4.071101157390693 False
9 -6.591803321334266 False
1 -9.070180154761154 False
6 -2.0415606574273 False
Episode 182, Total Reward: -2030.6160159496458
6 -0.557563640416601 False
9 -9.089599829761081 False
1 -9.070180154761154 False
7 -8.63933616755769 False
4 -4.250813147599771 False
3 -6.980965374268762 False
Episode 183, Total Reward: -2038.5884583143652
2 -7.653020649610424 False
9 -2.2550833802954076 False
5 -6.591803321334266 False
1 -3.2013328857401957 False
3 -8.062794569850828 False
8 -2.6150299035155595 False
Episode 184, Total Reward: -2030.3790647103467
4 -10.843880223714587 False
1 -11.509995892926003 False
9 -9.070180154761154 False
8 -7.853704773177191 False
6 -5.004577401674917 False
2 -8.093492715402329 False
5 -5.77799343429354 False
Episode 185, Total Reward: -1558.15382459595
6 -0.557563640416601 False
4 -11.374743052916678 False
7 -4.25



9 -8.579293986144062 False
5 -6.591803321334266 False




1 -3.2013328857401957 False




3 -8.062794569850828 False




7 -7.705775893101598 False
6 -9.189377103453513 False




Episode 191, Total Reward: -2043.3303777596245
3 -6.226340755216833 False
4 -6.980965374268762 False
9 -2.4969092474766588 False
1 -9.070180154761154 False
Episode 192, Total Reward: -3024.7743955317237
5 -1.9895625089960105 False
9 -6.591803321334266 False
3 -5.895412073873364 False
8 -2.6150299035155595 False
2 -8.192416453410834 False
1 -7.620784652302808 False
Episode 193, Total Reward: -2032.9050089134328
1 -2.2601094764485308 False
2 -7.620784652302808 False
6 -8.093492715402329 False
8 -5.004577401674917 False
3 -2.6150299035155595 False
Episode 194, Total Reward: -2525.5939941493443
6 -0.557563640416601 False
7 -9.189377103453513 False
1 -8.63933616755769 False
8 -6.742121402603406 False
2 -8.192416453410834 False
5 -5.77799343429354 False
Episode 195, Total Reward: -2039.0988082017357
3 -6.226340755216833 False
9 -5.895412073873364 False
8 -7.853704773177191 False
1 -6.742121402603406 False
2 -7.620784652302808 False
4 -4.585675628012074 False
Episode 196, Total Reward: -2038.



7 -9.21539248558825 False




2 -1.1138456925079876 False
4 -4.585675628012074 False




3 -6.980965374268762 False




9 -5.895412073873364 False




5 -6.591803321334266 False
Episode 201, Total Reward: -1538.9521395623633
8 -4.569044986778715 False
2 -8.192416453410834 False
6 -8.093492715402329 False
4 -11.374743052916678 False
Episode 202, Total Reward: -3032.2296972085087
9 -8.579293986144062 False
6 -9.089599829761081 False
4 -11.374743052916678 False
2 -4.585675628012074 False
1 -7.620784652302808 False
3 -8.062794569850828 False
7 -7.705775893101598 False
Episode 203, Total Reward: -1557.018667612089
6 -0.557563640416601 False
8 -5.004577401674917 False
9 -7.853704773177191 False
3 -5.895412073873364 False
5 -4.905091251117594 False
4 -8.86012987256621 False
7 -4.250813147599771 False
1 -8.63933616755769 False
Episode 204, Total Reward: -1045.9666283279835
8 -4.569044986778715 False
1 -6.742121402603406 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
7 -7.705775893101598 False
2 -1.1138456925079876 False
Episode 205, Total Reward: -2028.2372121118494
9 -8.579293986144062 False
1 -9.070180154761154 False
7 -8.639



7 -9.21539248558825 False
Episode 210, Total Reward: -2037.9479445114814
7 -8.756186467218008 False




9 -2.3673957673885857 False
5 -6.591803321334266 False




1 -3.2013328857401957 False




3 -8.062794569850828 False
6 -6.75623719662853 False




Episode 211, Total Reward: -2035.7357502081604
7 -8.756186467218008 False
4 -4.250813147599771 False
8 -9.351745191179539 False
9 -7.853704773177191 False
1 -9.070180154761154 False
2 -7.620784652302808 False
Episode 212, Total Reward: -2046.9034143862384
3 -6.226340755216833 False
6 -6.75623719662853 False
9 -9.089599829761081 False
2 -2.2550833802954076 False
8 -8.192416453410834 False
Episode 213, Total Reward: -2532.5196776153125
5 -1.9895625089960105 False
3 -4.905091251117594 False
1 -8.062794569850828 False
2 -7.620784652302808 False
4 -4.585675628012074 False
7 -4.250813147599771 False
Episode 214, Total Reward: -2031.414721757879
7 -8.756186467218008 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
1 -2.0415606574273 False
4 -11.509995892926003 False
Episode 215, Total Reward: -2533.764738614721
5 -1.9895625089960105 False
8 -4.071101157390693 False
7 -9.21539248558825 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
Episode 216, Total Reward: -2526.733



3 -6.226340755216833 False




4 -6.980965374268762 False
8 -9.351745191179539 False




9 -7.853704773177191 False
5 -6.591803321334266 False




6 -2.5146136553162997 False
Episode 221, Total Reward: -2039.5191730704928
6 -0.557563640416601 False
2 -8.093492715402329 False
3 -6.841918956903541 False
7 -7.705775893101598 False
5 -6.890753875802023 False
9 -6.591803321334266 False
8 -7.853704773177191 False
Episode 222, Total Reward: -1544.5350131761377
9 -8.579293986144062 False
3 -5.895412073873364 False
4 -6.980965374268762 False
1 -11.509995892926003 False
8 -6.742121402603406 False
7 -9.21539248558825 False
6 -9.189377103453513 False
Episode 223, Total Reward: -1558.1125583188575
8 -4.569044986778715 False
5 -4.071101157390693 False
7 -6.890753875802023 False
4 -4.250813147599771 False
2 -4.585675628012074 False
6 -8.093492715402329 False
Episode 224, Total Reward: -2032.4608815109855
3 -6.226340755216833 False
6 -6.75623719662853 False
5 -2.5146136553162997 False
9 -6.591803321334266 False
Episode 225, Total Reward: -3022.088994928496
6 -0.557563640416601 False
9 -9.089599829761081 False
1 -9.070180154761154 False
3 -8.0627



8 -4.569044986778715 False




4 -9.351745191179539 False




9 -2.4969092474766588 False
6 -9.089599829761081 False




7 -9.189377103453513 False




2 -1.1138456925079876 False
Episode 231, Total Reward: -2035.8105220511575
8 -4.569044986778715 False
4 -9.351745191179539 False
2 -4.585675628012074 False
7 -1.1138456925079876 False
1 -8.63933616755769 False
9 -9.070180154761154 False
Episode 232, Total Reward: -2037.3298278207972
4 -10.843880223714587 False
2 -4.585675628012074 False
7 -1.1138456925079876 False
5 -6.890753875802023 False
1 -3.2013328857401957 False
8 -6.742121402603406 False
6 -5.004577401674917 False
Episode 233, Total Reward: -1538.3821871100552
6 -0.557563640416601 False
4 -11.374743052916678 False
2 -4.585675628012074 False
5 -5.77799343429354 False
9 -6.591803321334266 False
7 -2.3673957673885857 False
8 -9.21539248558825 False
Episode 234, Total Reward: -1540.47056732995
9 -8.579293986144062 False
1 -9.070180154761154 False
5 -3.2013328857401957 False
2 -5.77799343429354 False
3 -6.841918956903541 False
8 -2.6150299035155595 False
7 -9.21539248558825 False
Episode 235, Total Reward: -1545.3011418069461
9 -8.57



9 -8.579293986144062 False




5 -6.591803321334266 False




7 -6.890753875802023 False




3 -7.705775893101598 False
Episode 241, Total Reward: -3029.7676270763823
6 -0.557563640416601 False
1 -2.0415606574273 False
8 -6.742121402603406 False
5 -4.071101157390693 False
9 -6.591803321334266 False
Episode 242, Total Reward: -2520.0041501791725
8 -4.569044986778715 False
1 -6.742121402603406 False
2 -7.620784652302808 False
5 -5.77799343429354 False
6 -2.5146136553162997 False
3 -6.75623719662853 False
Episode 243, Total Reward: -2033.9807953279233
6 -0.557563640416601 False
4 -11.374743052916678 False
7 -4.250813147599771 False
3 -7.705775893101598 False
8 -2.6150299035155595 False
Episode 244, Total Reward: -2526.5039256375503
2 -7.653020649610424 False
6 -8.093492715402329 False
7 -9.189377103453513 False
3 -7.705775893101598 False
4 -6.980965374268762 False
1 -11.509995892926003 False
9 -9.070180154761154 False
5 -6.591803321334266 False
Episode 245, Total Reward: -1066.7946111048582
4 -10.843880223714587 False
2 -4.585675628012074 False
1 -7.620784652302808 False
3 -8.062



7 -8.756186467218008 False
6 -9.189377103453513 False




9 -9.089599829761081 False
4 -2.4969092474766588 False




8 -9.351745191179539 False
1 -6.742121402603406 False




3 -8.062794569850828 False
Episode 251, Total Reward: -1553.6887338115432
9 -8.579293986144062 False
1 -9.070180154761154 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
6 -6.75623719662853 False
2 -8.093492715402329 False
Episode 252, Total Reward: -2040.6056281897938
8 -4.569044986778715 False
2 -8.192416453410834 False
9 -2.2550833802954076 False
5 -6.591803321334266 False
7 -6.890753875802023 False
Episode 253, Total Reward: -2528.499102017621
9 -8.579293986144062 False
4 -2.4969092474766588 False
8 -9.351745191179539 False
3 -2.6150299035155595 False
7 -7.705775893101598 False
Episode 254, Total Reward: -2530.7487542214176
2 -7.653020649610424 False
8 -8.192416453410834 False
6 -5.004577401674917 False
3 -6.75623719662853 False
9 -5.895412073873364 False
5 -6.591803321334266 False
7 -6.890753875802023 False
Episode 255, Total Reward: -1546.9842209723342
2 -7.653020649610424 False
4 -4.585675628012074 False
7 -4.250813147599771 False
9 -2.3673957673885857 False
8 -7.85



7 -1.1138456925079876 False
Episode 260, Total Reward: -2519.0731444497656
1 -2.2601094764485308 False




9 -9.070180154761154 False




5 -6.591803321334266 False




2 -5.77799343429354 False




3 -6.841918956903541 False
6 -6.75623719662853 False




7 -9.189377103453513 False
Episode 261, Total Reward: -1546.487619643823
2 -7.653020649610424 False
5 -5.77799343429354 False
4 -8.86012987256621 False
1 -11.509995892926003 False
6 -2.0415606574273 False
9 -9.089599829761081 False
Episode 262, Total Reward: -2044.9323003365844
1 -2.2601094764485308 False
6 -2.0415606574273 False
3 -6.75623719662853 False
7 -7.705775893101598 False
5 -6.890753875802023 False
4 -8.86012987256621 False
Episode 263, Total Reward: -2034.514566971974
6 -0.557563640416601 False
7 -9.189377103453513 False
9 -2.3673957673885857 False
4 -2.4969092474766588 False
2 -4.585675628012074 False
Episode 264, Total Reward: -2519.1969213867474
7 -8.756186467218008 False
6 -9.189377103453513 False
9 -9.089599829761081 False
8 -7.853704773177191 False
3 -2.6150299035155595 False
4 -6.980965374268762 False
2 -4.585675628012074 False
Episode 265, Total Reward: -1549.0705390794062
5 -1.9895625089960105 False
2 -5.77799343429354 False
1 -7.620784652302808 False
7 -8.639336167



1 -9.070180154761154 False
7 -8.63933616755769 False




5 -6.890753875802023 False




4 -8.86012987256621 False
6 -11.374743052916678 False




3 -6.75623719662853 False
Episode 271, Total Reward: -1560.1706743063764
1 -2.2601094764485308 False
4 -11.509995892926003 False
3 -6.980965374268762 False
7 -7.705775893101598 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
Episode 272, Total Reward: -2039.9138422338945
5 -1.9895625089960105 False
8 -4.071101157390693 False
1 -6.742121402603406 False
6 -2.0415606574273 False
4 -11.374743052916678 False
2 -4.585675628012074 False
7 -1.1138456925079876 False
3 -7.705775893101598 False
Episode 273, Total Reward: -1039.6243859929557
8 -4.569044986778715 False
6 -5.004577401674917 False
5 -2.5146136553162997 False
7 -6.890753875802023 False
1 -8.63933616755769 False
3 -8.062794569850828 False
Episode 274, Total Reward: -2035.6811206569805
3 -6.226340755216833 False
1 -8.062794569850828 False
9 -9.070180154761154 False
4 -2.4969092474766588 False
2 -4.585675628012074 False
5 -5.77799343429354 False
Episode 275, Total Reward: -2036.219893789611
6 -0.557563640416601 False
7 -9.18



3 -4.905091251117594 False




4 -6.980965374268762 False
1 -11.509995892926003 False




8 -6.742121402603406 False
2 -8.192416453410834 False




Episode 281, Total Reward: -2040.3201528833229
5 -1.9895625089960105 False
6 -2.5146136553162997 False
4 -11.374743052916678 False
3 -6.980965374268762 False
1 -8.062794569850828 False
7 -8.63933616755769 False
Episode 282, Total Reward: -2039.5620153289062
1 -2.2601094764485308 False
9 -9.070180154761154 False
4 -2.4969092474766588 False
8 -9.351745191179539 False
2 -8.192416453410834 False
5 -5.77799343429354 False
Episode 283, Total Reward: -2037.1493539575704
2 -7.653020649610424 False
8 -8.192416453410834 False
1 -6.742121402603406 False
6 -2.0415606574273 False
4 -11.374743052916678 False
Episode 284, Total Reward: -2536.0038622159686
1 -2.2601094764485308 False
9 -9.070180154761154 False
7 -2.3673957673885857 False
5 -6.890753875802023 False
2 -5.77799343429354 False
Episode 285, Total Reward: -2526.3664327086935
1 -2.2601094764485308 False
9 -9.070180154761154 False
3 -5.895412073873364 False
5 -4.905091251117594 False
6 -2.5146136553162997 False
Episode 286, Total Reward: -252



Episode 290, Total Reward: -3028.7796886553465
3 -6.226340755216833 False




1 -8.062794569850828 False
2 -7.620784652302808 False




9 -2.2550833802954076 False
5 -6.591803321334266 False




8 -4.071101157390693 False
Episode 291, Total Reward: -2034.827907836391
8 -4.569044986778715 False
6 -5.004577401674917 False
5 -2.5146136553162997 False
2 -5.77799343429354 False
3 -6.841918956903541 False
7 -7.705775893101598 False
Episode 292, Total Reward: -2032.4139243280688
6 -0.557563640416601 False
7 -9.189377103453513 False
3 -7.705775893101598 False
8 -2.6150299035155595 False
Episode 293, Total Reward: -3020.0677465404874
2 -7.653020649610424 False
8 -8.192416453410834 False
7 -9.21539248558825 False
1 -8.63933616755769 False
5 -3.2013328857401957 False
9 -6.591803321334266 False
Episode 294, Total Reward: -2043.4933019632417
4 -10.843880223714587 False
8 -9.351745191179539 False
3 -2.6150299035155595 False
7 -7.705775893101598 False
2 -1.1138456925079876 False
1 -7.620784652302808 False
5 -3.2013328857401957 False
Episode 295, Total Reward: -1542.4523944420623
4 -10.843880223714587 False
8 -9.351745191179539 False
9 -7.853704773177191 False
7 -2.3673957673885857 False
1 -8



1 -2.2601094764485308 False
3 -8.062794569850828 False




4 -6.980965374268762 False




6 -11.374743052916678 False
9 -9.089599829761081 False




Episode 301, Total Reward: -2537.7682123032455
1 -2.2601094764485308 False
2 -7.620784652302808 False
7 -1.1138456925079876 False
6 -9.189377103453513 False
3 -6.75623719662853 False
9 -5.895412073873364 False
Episode 302, Total Reward: -2032.8357661952148
9 -8.579293986144062 False
6 -9.089599829761081 False
8 -5.004577401674917 False
3 -2.6150299035155595 False
7 -7.705775893101598 False
Episode 303, Total Reward: -2532.994277014197
1 -2.2601094764485308 False
2 -7.620784652302808 False
8 -8.192416453410834 False
4 -9.351745191179539 False
6 -11.374743052916678 False
5 -2.5146136553162997 False
Episode 304, Total Reward: -2041.314412481575
3 -6.226340755216833 False
7 -7.705775893101598 False
1 -8.63933616755769 False
5 -3.2013328857401957 False
4 -8.86012987256621 False
Episode 305, Total Reward: -2534.6329155741823
3 -6.226340755216833 False
6 -6.75623719662853 False
2 -8.093492715402329 False
9 -2.2550833802954076 False
7 -2.3673957673885857 False
8 -9.21539248558825 False
1 -6.74



6 -2.5146136553162997 False
Episode 310, Total Reward: -2030.3931677916246
6 -0.557563640416601 False




2 -8.093492715402329 False
8 -8.192416453410834 False




7 -9.21539248558825 False




4 -4.250813147599771 False




1 -11.509995892926003 False
Episode 311, Total Reward: -2041.8196743353437
3 -6.226340755216833 False
1 -8.062794569850828 False
4 -11.509995892926003 False
7 -4.250813147599771 False
6 -9.189377103453513 False
Episode 312, Total Reward: -2539.239321469047
4 -10.843880223714587 False
5 -8.86012987256621 False
3 -4.905091251117594 False
6 -6.75623719662853 False
7 -9.189377103453513 False
9 -2.3673957673885857 False
Episode 313, Total Reward: -2042.922111414869
5 -1.9895625089960105 False
4 -8.86012987256621 False
7 -4.250813147599771 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
1 -2.0415606574273 False
2 -7.620784652302808 False
Episode 314, Total Reward: -1536.2198464360417
6 -0.557563640416601 False
8 -5.004577401674917 False
9 -7.853704773177191 False
1 -9.070180154761154 False
7 -8.63933616755769 False
2 -1.1138456925079876 False
Episode 315, Total Reward: -2032.2392078300957
5 -1.9895625089960105 False
1 -3.2013328857401957 False
8 -6.742121402603406 False
2 -8.192



8 -6.742121402603406 False
Episode 320, Total Reward: -3022.130461998196
4 -10.843880223714587 False




3 -6.980965374268762 False
5 -4.905091251117594 False




9 -6.591803321334266 False




6 -9.089599829761081 False
8 -5.004577401674917 False




Episode 321, Total Reward: -2043.415917401871
6 -0.557563640416601 False
2 -8.093492715402329 False
5 -5.77799343429354 False
4 -8.86012987256621 False
8 -9.351745191179539 False
9 -7.853704773177191 False
3 -5.895412073873364 False
Episode 322, Total Reward: -1546.3900417009088
9 -8.579293986144062 False
2 -2.2550833802954076 False
7 -1.1138456925079876 False
3 -7.705775893101598 False
1 -8.062794569850828 False
8 -6.742121402603406 False
Episode 323, Total Reward: -2034.4589149245035
9 -8.579293986144062 False
7 -2.3673957673885857 False
8 -9.21539248558825 False
2 -8.192416453410834 False
4 -4.585675628012074 False
6 -11.374743052916678 False
5 -2.5146136553162997 False
Episode 324, Total Reward: -1546.8295310287767
7 -8.756186467218008 False
1 -8.63933616755769 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
6 -6.75623719662853 False
2 -8.093492715402329 False
8 -8.192416453410834 False
9 -7.853704773177191 False
Episode 325, Total Reward: -1056.3977979102524
1 -2.2601



5 -5.77799343429354 False
8 -4.071101157390693 False




7 -9.21539248558825 False
4 -4.250813147599771 False




1 -11.509995892926003 False
Episode 331, Total Reward: -2042.4783167674088
3 -6.226340755216833 False
8 -2.6150299035155595 False
4 -9.351745191179539 False
2 -4.585675628012074 False
6 -8.093492715402329 False
Episode 332, Total Reward: -2530.872284193326
3 -6.226340755216833 False
6 -6.75623719662853 False
8 -5.004577401674917 False
7 -9.21539248558825 False
2 -1.1138456925079876 False
5 -5.77799343429354 False
Episode 333, Total Reward: -2034.0943869659102
9 -8.579293986144062 False
4 -2.4969092474766588 False
1 -11.509995892926003 False
3 -8.062794569850828 False
2 -6.841918956903541 False
Episode 334, Total Reward: -2537.490912653301
8 -4.569044986778715 False
9 -7.853704773177191 False
2 -2.2550833802954076 False
4 -4.585675628012074 False
5 -8.86012987256621 False
3 -4.905091251117594 False
Episode 335, Total Reward: -2033.028729891947
2 -7.653020649610424 False
8 -8.192416453410834 False
4 -9.351745191179539 False
9 -2.4969092474766588 False
7 -2.3673957673885857 False
5 -6.890



8 -6.742121402603406 False
5 -4.071101157390693 False




4 -8.86012987256621 False
3 -6.980965374268762 False




6 -6.75623719662853 False
9 -9.089599829761081 False




Episode 341, Total Reward: -1544.760264309667
4 -10.843880223714587 False
5 -8.86012987256621 False
1 -3.2013328857401957 False
7 -8.63933616755769 False
3 -7.705775893101598 False
Episode 342, Total Reward: -2539.25045504268
1 -2.2601094764485308 False
7 -8.63933616755769 False
5 -6.890753875802023 False
6 -2.5146136553162997 False
2 -8.093492715402329 False
4 -4.585675628012074 False
3 -6.980965374268762 False
Episode 343, Total Reward: -1539.9649468928078
7 -8.756186467218008 False
8 -9.21539248558825 False
9 -7.853704773177191 False
3 -5.895412073873364 False
6 -6.75623719662853 False
Episode 344, Total Reward: -2538.4769329964856
2 -7.653020649610424 False
3 -6.841918956903541 False
5 -4.905091251117594 False
7 -6.890753875802023 False
8 -9.21539248558825 False
1 -6.742121402603406 False
Episode 345, Total Reward: -2042.2482986216253
2 -7.653020649610424 False
7 -1.1138456925079876 False
3 -7.705775893101598 False
8 -2.6150299035155595 False
1 -6.742121402603406 False
4 -11.509995



Episode 350, Total Reward: -3523.523048567129
4 -10.843880223714587 False
1 -11.509995892926003 False




2 -7.620784652302808 False




6 -8.093492715402329 False
Episode 351, Total Reward: -3038.068153484346
9 -8.579293986144062 False
1 -9.070180154761154 False
6 -2.0415606574273 False
3 -6.75623719662853 False
7 -7.705775893101598 False
5 -6.890753875802023 False
Episode 352, Total Reward: -2041.0438017638646
8 -4.569044986778715 False
6 -5.004577401674917 False
2 -8.093492715402329 False
7 -1.1138456925079876 False
Episode 353, Total Reward: -3018.780960796364
6 -0.557563640416601 False
3 -6.75623719662853 False
9 -5.895412073873364 False
4 -2.4969092474766588 False
Episode 354, Total Reward: -3015.7061221583954
8 -4.569044986778715 False
2 -8.192416453410834 False
4 -4.585675628012074 False
3 -6.980965374268762 False
1 -8.062794569850828 False
Episode 355, Total Reward: -2532.390897012321
1 -2.2601094764485308 False
5 -3.2013328857401957 False
4 -8.86012987256621 False
3 -6.980965374268762 False
8 -2.6150299035155595 False
2 -8.192416453410834 False
Episode 356, Total Reward: -2032.1099839659503
3 -6.22634075521683



8 -2.6150299035155595 False
4 -9.351745191179539 False




1 -11.509995892926003 False
7 -8.63933616755769 False




Episode 361, Total Reward: -2538.3424479103955
2 -7.653020649610424 False
4 -4.585675628012074 False
6 -11.374743052916678 False
3 -6.75623719662853 False
8 -2.6150299035155595 False
9 -7.853704773177191 False
Episode 362, Total Reward: -2040.8384112038605
7 -8.756186467218008 False
4 -4.250813147599771 False
9 -2.4969092474766588 False
1 -9.070180154761154 False
5 -3.2013328857401957 False
Episode 363, Total Reward: -2527.7754219027956
9 -8.579293986144062 False
3 -5.895412073873364 False
5 -4.905091251117594 False
2 -5.77799343429354 False
Episode 364, Total Reward: -3025.1577907454284
7 -8.756186467218008 False
5 -6.890753875802023 False
8 -4.071101157390693 False
9 -7.853704773177191 False
2 -2.2550833802954076 False
4 -4.585675628012074 False
Episode 365, Total Reward: -2034.4125052818954
2 -7.653020649610424 False
6 -8.093492715402329 False
7 -9.189377103453513 False
5 -6.890753875802023 False
3 -4.905091251117594 False
9 -5.895412073873364 False
4 -2.4969092474766588 False
Episo



5 -5.77799343429354 False




8 -4.071101157390693 False




4 -9.351745191179539 False




1 -11.509995892926003 False




6 -2.0415606574273 False
Episode 371, Total Reward: -2040.4054169828275
7 -8.756186467218008 False
9 -2.3673957673885857 False
8 -7.853704773177191 False
1 -6.742121402603406 False
5 -3.2013328857401957 False
6 -2.5146136553162997 False
4 -11.374743052916678 False
Episode 372, Total Reward: -1542.8100980043605
9 -8.579293986144062 False
8 -7.853704773177191 False
3 -2.6150299035155595 False
6 -6.75623719662853 False
2 -8.093492715402329 False
5 -5.77799343429354 False
Episode 373, Total Reward: -2039.6757520091612
9 -8.579293986144062 False
1 -9.070180154761154 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
4 -6.980965374268762 False
7 -4.250813147599771 False
8 -9.21539248558825 False
2 -8.192416453410834 False
Episode 374, Total Reward: -1054.3954857386307
2 -7.653020649610424 False
5 -5.77799343429354 False
4 -8.86012987256621 False
1 -11.509995892926003 False
8 -6.742121402603406 False
3 -2.6150299035155595 False
Episode 375, Total Reward: -2043.1582911555151
6 -0.557



2 -5.77799343429354 False




9 -2.2550833802954076 False
6 -9.089599829761081 False




4 -11.374743052916678 False
7 -4.250813147599771 False




Episode 381, Total Reward: -2034.7377953538626
8 -4.569044986778715 False
4 -9.351745191179539 False
5 -8.86012987256621 False
9 -6.591803321334266 False
2 -2.2550833802954076 False
6 -8.093492715402329 False
1 -2.0415606574273 False
Episode 382, Total Reward: -1541.7628601249837
2 -7.653020649610424 False
6 -8.093492715402329 False
3 -6.75623719662853 False
9 -5.895412073873364 False
1 -9.070180154761154 False
4 -11.509995892926003 False
8 -9.351745191179539 False
5 -4.071101157390693 False
Episode 383, Total Reward: -1062.401185031772
3 -6.226340755216833 False
9 -5.895412073873364 False
8 -7.853704773177191 False
6 -5.004577401674917 False
7 -9.189377103453513 False
Episode 384, Total Reward: -2534.169412107396
3 -6.226340755216833 False
6 -6.75623719662853 False
5 -2.5146136553162997 False
9 -6.591803321334266 False
8 -7.853704773177191 False
2 -8.192416453410834 False
1 -7.620784652302808 False
4 -11.509995892926003 False
Episode 385, Total Reward: -1057.2658967003129
3 -6.2263407



4 -10.843880223714587 False
8 -9.351745191179539 False




7 -9.21539248558825 False
5 -6.890753875802023 False




9 -6.591803321334266 False
6 -9.089599829761081 False




Episode 391, Total Reward: -2051.98317492738
6 -0.557563640416601 False
2 -8.093492715402329 False
9 -2.2550833802954076 False
5 -6.591803321334266 False
1 -3.2013328857401957 False
4 -11.509995892926003 False
Episode 392, Total Reward: -2032.2092718361148
4 -10.843880223714587 False
3 -6.980965374268762 False
9 -5.895412073873364 False
6 -9.089599829761081 False
5 -2.5146136553162997 False
1 -3.2013328857401957 False
Episode 393, Total Reward: -2038.5258040426743
4 -10.843880223714587 False
9 -2.4969092474766588 False
1 -9.070180154761154 False
5 -3.2013328857401957 False
3 -4.905091251117594 False
8 -2.6150299035155595 False
2 -8.192416453410834 False
Episode 394, Total Reward: -1541.3248401197366
2 -7.653020649610424 False
5 -5.77799343429354 False
4 -8.86012987256621 False
1 -11.509995892926003 False
7 -8.63933616755769 False
8 -9.21539248558825 False
Episode 395, Total Reward: -2051.655868502542
7 -8.756186467218008 False
6 -9.189377103453513 False
3 -6.75623719662853 False
9 -5.8



Episode 400, Total Reward: -3012.474992330442
8 -4.569044986778715 False




5 -4.071101157390693 False
9 -6.591803321334266 False




4 -2.4969092474766588 False
7 -4.250813147599771



 False
Episode 401, Total Reward: -2521.9796718605803
6 -0.557563640416601 False
2 -8.093492715402329 False
3 -6.841918956903541 False
4 -6.980965374268762 False
1 -11.509995892926003 False
Episode 402, Total Reward: -2533.983936579917
2 -7.653020649610424 False
9 -2.2550833802954076 False
3 -5.895412073873364 False
4 -6.980965374268762 False
6 -11.374743052916678 False
8 -5.004577401674917 False
5 -4.071101157390693 False
7 -6.890753875802023 False
Episode 403, Total Reward: -1050.1256569658321
7 -8.756186467218008 False
9 -2.3673957673885857 False
3 -5.895412073873364 False
1 -8.062794569850828 False
2 -7.620784652302808 False
4 -4.585675628012074 False
5 -8.86012987256621 False
Episode 404, Total Reward: -1546.1483790312118
5 -1.9895625089960105 False
2 -5.77799343429354 False
3 -6.841918956903541 False
9 -5.895412073873364 False
1 -9.070180154761154 False
4 -11.509995892926003 False
Episode 405, Total Reward: -2041.0850630217537
4 -10.843880223714587 False
7 -4.250813147599771 Fals



1 -2.2601094764485308 False
9 -9.070180154761154 False




4 -2.4969092474766588 False
5 -8.86012987256621 False




3 -4.905091251117594 False
2 -6.841918956903541 False




8 -8.192416453410834 False
Episode 411, Total Reward: -1542.6267554126846
4 -10.843880223714587 False
1 -11.509995892926003 False
2 -7.620784652302808 False
3 -6.841918956903541 False
9 -5.895412073873364 False
Episode 412, Total Reward: -2542.71199179972
6 -0.557563640416601 False
4 -11.374743052916678 False
2 -4.585675628012074 False
8 -8.192416453410834 False
5 -4.071101157390693 False
9 -6.591803321334266 False
Episode 413, Total Reward: -2035.373303253481
1 -2.2601094764485308 False
5 -3.2013328857401957 False
8 -4.071101157390693 False
7 -9.21539248558825 False
3 -7.705775893101598 False
4 -6.980965374268762 False
Episode 414, Total Reward: -2033.434677272538
9 -8.579293986144062 False
8 -7.853704773177191 False
5 -4.071101157390693 False
1 -3.2013328857401957 False
6 -2.0415606574273 False
Episode 415, Total Reward: -2525.7469934598794
9 -8.579293986144062 False
6 -9.089599829761081 False
8 -5.004577401674917 False
1 -6.742121402603406 False
2 -7.620784652302808 False
7 -1.11384



9 -6.591803321334266 False
1 -9.070180154761154 False




8 -6.742121402603406 False
2 -8.192416453410834 False




6 -8.093492715402329 False




7 -9.189377103453513 False
Episode 421, Total Reward: -1549.8689536599616
3 -6.226340755216833 False
6 -6.75623719662853 False
5 -2.5146136553162997 False
9 -6.591803321334266 False
4 -2.4969092474766588 False
Episode 422, Total Reward: -2524.5859041759727
4 -10.843880223714587 False
2 -4.585675628012074 False
8 -8.192416453410834 False
1 -6.742121402603406 False
3 -8.062794569850828 False
Episode 423, Total Reward: -2538.426888277592
7 -8.756186467218008 False
4 -4.250813147599771 False
2 -4.585675628012074 False
8 -8.192416453410834 False
3 -2.6150299035155595 False
Episode 424, Total Reward: -2528.400121599756
7 -8.756186467218008 False
1 -8.63933616755769 False
4 -11.509995892926003 False
5 -8.86012987256621 False
3 -4.905091251117594 False
Episode 425, Total Reward: -2542.6707396513852
3 -6.226340755216833 False
5 -4.905091251117594 False
8 -4.071101157390693 False
4 -9.351745191179539 False
1 -11.509995892926003 False
7 -8.63933616755769 False
Episode 426, Total Reward: -2044.703



9 -8.579293986144062 False
5 -6.591803321334266 False




8 -4.071101157390693 False
6 -5.004577401674917 False




7 -9.189377103453513 False
4 -4.250813147599771 False




Episode 431, Total Reward: -2037.6869661175972
3 -6.226340755216833 False
4 -6.980965374268762 False
5 -8.86012987256621 False
6 -2.5146136553162997 False
1 -2.0415606574273 False
2 -7.620784652302808 False
Episode 432, Total Reward: -2034.2443949670983
8 -4.569044986778715 False
1 -6.742121402603406 False
9 -9.070180154761154 False
4 -2.4969092474766588 False
6 -11.374743052916678 False
Episode 433, Total Reward: -2534.2529988445367
4 -10.843880223714587 False
2 -4.585675628012074 False
7 -1.1138456925079876 False
6 -9.189377103453513 False
5 -2.5146136553162997 False
Episode 434, Total Reward: -2528.2473923030043
5 -1.9895625089960105 False
3 -4.905091251117594 False
8 -2.6150299035155595 False
7 -9.21539248558825 False
6 -9.189377103453513 False
Episode 435, Total Reward: -2527.9144532526707
8 -4.569044986778715 False
1 -6.742121402603406 False
3 -8.062794569850828 False
2 -6.841918956903541 False
9 -2.2550833802954076 False
7 -2.3673957673885857 False
Episode 436, Total Reward: -20



7 -9.21539248558825 False




4 -4.250813147599771 False




1 -11.509995892926003 False




3 -8.062794569850828 False
5 -4.905091251117594 False




Episode 441, Total Reward: -2042.513132333861
6 -0.557563640416601 False
7 -9.189377103453513 False
9 -2.3673957673885857 False
1 -9.070180154761154 False
Episode 442, Total Reward: -3021.1845166660196
8 -4.569044986778715 False
7 -9.21539248558825 False
4 -4.250813147599771 False
9 -2.4969092474766588 False
3 -5.895412073873364 False
2 -6.841918956903541 False
6 -8.093492715402329 False
Episode 443, Total Reward: -1541.3629836136226
9 -8.579293986144062 False
3 -5.895412073873364 False
5 -4.905091251117594 False
8 -4.071101157390693 False
6 -5.004577401674917 False
7 -9.189377103453513 False
1 -8.63933616755769 False
4 -11.509995892926003 False
Episode 444, Total Reward: -1057.7941850341376
9 -8.579293986144062 False
4 -2.4969092474766588 False
1 -11.509995892926003 False
3 -8.062794569850828 False
6 -6.75623719662853 False
8 -5.004577401674917 False
2 -8.192416453410834 False
Episode 445, Total Reward: -1550.602224748112
1 -2.2601094764485308 False
6 -2.0415606574273 False
3 -6.75623



4 -4.250813147599771 False
2 -4.585675628012074 False




3 -6.841918956903541 False
9 -5.895412073873364 False




8 -7.853704773177191 False
1 -6.742121402603406 False




Episode 451, Total Reward: -1544.9258324493874
6 -0.557563640416601 False
3 -6.75623719662853 False
1 -8.062794569850828 False
8 -6.742121402603406 False
5 -4.071101157390693 False
9 -6.591803321334266 False
Episode 452, Total Reward: -2032.7816212882244
1 -2.2601094764485308 False
5 -3.2013328857401957 False
6 -2.5146136553162997 False
8 -5.004577401674917 False
9 -7.853704773177191 False
3 -5.895412073873364 False
2 -6.841918956903541 False
Episode 453, Total Reward: -1533.571669223134
7 -8.756186467218008 False
3 -7.705775893101598 False
9 -5.895412073873364 False
6 -9.089599829761081 False
8 -5.004577401674917 False
1 -6.742121402603406 False
Episode 454, Total Reward: -2043.1936730682326
7 -8.756186467218008 False
1 -8.63933616755769 False
6 -2.0415606574273 False
5 -2.5146136553162997 False
3 -4.905091251117594 False
9 -5.895412073873364 False
4 -2.4969092474766588 False
Episode 455, Total Reward: -1535.2491095199869
6 -0.557563640416601 False
3 -6.75623719662853 False
9 -5.89541



9 -2.4969092474766588 False




3 -5.895412073873364 False
8 -2.6150299035155595 False




2 -8.192416453410834 False
6 -8.093492715402329 False




5 -2.5146136553162997 False
Episode 461, Total Reward: -1540.6517542727097
7 -8.756186467218008 False
8 -9.21539248558825 False
5 -4.071101157390693 False
2 -5.77799343429354 False
3 -6.841918956903541 False
Episode 462, Total Reward: -2534.662592501394
5 -1.9895625089960105 False
4 -8.86012987256621 False
7 -4.250813147599771 False
9 -2.3673957673885857 False
2 -2.2550833802954076 False
1 -7.620784652302808 False
6 -2.0415606574273 False
Episode 463, Total Reward: -1529.3853299865762
1 -2.2601094764485308 False
3 -8.062794569850828 False
5 -4.905091251117594 False
4 -8.86012987256621 False
2 -4.585675628012074 False
8 -8.192416453410834 False
Episode 464, Total Reward: -2036.866217251406
8 -4.569044986778715 False
7 -9.21539248558825 False
2 -1.1138456925079876 False
6 -8.093492715402329 False
9 -9.089599829761081 False
5 -6.591803321334266 False
4 -8.86012987256621 False
Episode 465, Total Reward: -1547.5333089039389
3 -6.226340755216833 False
5 -4.905091251117594 False
7 -6.89075387



6 -2.5146136553162997 False
3 -6.75623719662853 False




7 -7.705775893101598 False
8 -9.21539248558825 False




1 -6.742121402603406 False
2 -7.620784652302808 False




Episode 471, Total Reward: -1542.5444877945367
4 -10.843880223714587 False
7 -4.250813147599771 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
5 -2.5146136553162997 False
8 -4.071101157390693 False
1 -6.742121402603406 False
Episode 472, Total Reward: -1539.8795251837746
3 -6.226340755216833 False
5 -4.905091251117594 False
7 -6.890753875802023 False
1 -8.63933616755769 False
Episode 473, Total Reward: -3026.661522049694
8 -4.569044986778715 False
9 -7.853704773177191 False
6 -9.089599829761081 False
5 -2.5146136553162997 False
7 -6.890753875802023 False
1 -8.63933616755769 False
Episode 474, Total Reward: -2039.557053288393
1 -2.2601094764485308 False
7 -8.63933616755769 False
8 -9.21539248558825 False
5 -4.071101157390693 False
4 -8.86012987256621 False
Episode 475, Total Reward: -2533.0460691595513
6 -0.557563640416601 False
1 -2.0415606574273 False
2 -7.620784652302808 False
5 -5.77799343429354 False
4 -8.86012987256621 False
3 -6.980965374268762 False
Episode 476, To



2 -1.1138456925079876 False
Episode 480, Total Reward: -2035.8818398295584
5 -1.9895625089960105 False
2 -5.77799343429354 False




7 -1.1138456925079876 False
4 -4.250813147599771 False




6 -11.374743052916678 False




1 -2.0415606574273 False
8 -6.742121402603406 False




Episode 481, Total Reward: -1533.2906398963448
9 -8.579293986144062 False
3 -5.895412073873364 False
8 -2.6150299035155595 False
4 -9.351745191179539 False
2 -4.585675628012074 False
1 -7.620784652302808 False
6 -2.0415606574273 False
7 -9.189377103453513 False
Episode 482, Total Reward: -1049.8788791959082
1 -2.2601094764485308 False
3 -8.062794569850828 False
4 -6.980965374268762 False
8 -9.351745191179539 False
9 -7.853704773177191 False
6 -9.089599829761081 False
Episode 483, Total Reward: -2043.5989192146858
8 -4.569044986778715 False
6 -5.004577401674917 False
3 -6.75623719662853 False
9 -5.895412073873364 False
7 -2.3673957673885857 False
Episode 484, Total Reward: -2524.592667426344
5 -1.9895625089960105 False
6 -2.5146136553162997 False
8 -5.004577401674917 False
7 -9.21539248558825 False
1 -8.63933616755769 False
9 -9.070180154761154 False
Episode 485, Total Reward: -2036.4336623738943
7 -8.756186467218008 False
5 -6.890753875802023 False
4 -8.86012987256621 False
3 -6.980965



8 -9.351745191179539 False




3 -2.6150299035155595 False




5 -4.905091251117594 False
9 -6.591803321334266 False




2 -2.2550833802954076 False
Episode 491, Total Reward: -2036.562633271157
1 -2.2601094764485308 False
2 -7.620784652302808 False
7 -1.1138456925079876 False
9 -2.3673957673885857 False
5 -6.591803321334266 False
Episode 492, Total Reward: -2519.9539389099823
5 -1.9895625089960105 False
2 -5.77799343429354 False
4 -4.585675628012074 False
7 -4.250813147599771 False
1 -8.63933616755769 False
8 -6.742121402603406 False
Episode 493, Total Reward: -2031.9855022890624
8 -4.569044986778715 False
1 -6.742121402603406 False
5 -3.2013328857401957 False
9 -6.591803321334266 False
Episode 494, Total Reward: -3021.1043025964564
6 -0.557563640416601 False
3 -6.75623719662853 False
2 -6.841918956903541 False
4 -4.585675628012074 False
7 -4.250813147599771 False
Episode 495, Total Reward: -2522.9922085695607
6 -0.557563640416601 False
8 -5.004577401674917 False
9 -7.853704773177191 False
7 -2.3673957673885857 False
5 -6.890753875802023 False
Episode 496, Total Reward: -2522.673995458459
3 -6.226340755



Episode 500, Total Reward: -2048.9426591674
2 -7.653020649610424 False




9 -2.2550833802954076 False
1 -9.070180154761154 False




3 -8.062794569850828 False
7 -7.705775893101598 False




5 -6.890753875802023 False
Episode 501, Total Reward: -2041.6376085234215
5 -1.9895625089960105 False
3 -4.905091251117594 False
9 -5.895412073873364 False
7 -2.3673957673885857 False
2 -1.1138456925079876 False
4 -4.585675628012074 False
Episode 502, Total Reward: -2020.8569829218957
9 -8.579293986144062 False
3 -5.895412073873364 False
8 -2.6150299035155595 False
6 -5.004577401674917 False
1 -2.0415606574273 False
Episode 503, Total Reward: -2524.135874022635
6 -0.557563640416601 False
7 -9.189377103453513 False
8 -9.21539248558825 False
3 -2.6150299035155595 False
2 -6.841918956903541 False
1 -7.620784652302808 False
Episode 504, Total Reward: -2036.0400667421802
2 -7.653020649610424 False
1 -7.620784652302808 False
7 -8.63933616755769 False
4 -4.250813147599771 False
6 -11.374743052916678 False
8 -5.004577401674917 False
Episode 505, Total Reward: -2044.5432750716623
4 -10.843880223714587 False
2 -4.585675628012074 False
1 -7.620784652302808 False
7 -8.63933616755769 False
6 -9.189



4 -11.374743052916678 False
Episode 510, Total Reward: -2039.181559297015
4 -10.843880223714587 False




2 -4.585675628012074 False
8 -8.192416453410834 False




9 -7.853704773177191 False
7 -2.3673957673885857 False




5 -6.890753875802023 False
Episode 511, Total Reward: -2040.7338267215052
2 -7.653020649610424 False
1 -7.620784652302808 False
5 -3.2013328857401957 False
6 -2.5146136553162997 False
4 -11.374743052916678 False
Episode 512, Total Reward: -2532.3644948958863
9 -8.579293986144062 False
5 -6.591803321334266 False
1 -3.2013328857401957 False
7 -8.63933616755769 False
2 -1.1138456925079876 False
Episode 513, Total Reward: -2528.125612053284
8 -4.569044986778715 False
1 -6.742121402603406 False
3 -8.062794569850828 False
6 -6.75623719662853 False
4 -11.374743052916678 False
9 -2.4969092474766588 False
Episode 514, Total Reward: -2040.0018504562547
1 -2.2601094764485308 False
6 -2.0415606574273 False
2 -8.093492715402329 False
7 -1.1138456925079876 False
5 -6.890753875802023 False
4 -8.86012987256621 False
Episode 515, Total Reward: -2029.259892290154
9 -8.579293986144062 False
2 -2.2550833802954076 False
6 -8.093492715402329 False
4 -11.374743052916678 False
7 -4.250813147599771 False
5 -6.



Episode 520, Total Reward: -2530.306621815712
6 -0.557563640416601 False




5 -2.5146136553162997 False
9 -6.591803321334266 False




7 -2.3673957673885857 False




2 -1.1138456925079876 False




4 -4.585675628012074 False
Episode 521, Total Reward: -2017.7308977049759
4 -10.843880223714587 False
6 -11.374743052916678 False
7 -9.189377103453513 False
1 -8.63933616755769 False
Episode 522, Total Reward: -3040.0473365476428
9 -8.579293986144062 False
5 -6.591803321334266 False
2 -5.77799343429354 False
3 -6.841918956903541 False
6 -6.75623719662853 False
Episode 523, Total Reward: -2534.547246895304
6 -0.557563640416601 False
5 -2.5146136553162997 False
9 -6.591803321334266 False
1 -9.070180154761154 False
2 -7.620784652302808 False
Episode 524, Total Reward: -2526.354945424131
3 -6.226340755216833 False
4 -6.980965374268762 False
2 -4.585675628012074 False
5 -5.77799343429354 False
6 -2.5146136553162997 False
9 -9.089599829761081 False
Episode 525, Total Reward: -2035.1751886768684
1 -2.2601094764485308 False
4 -11.509995892926003 False
5 -8.86012987256621 False
3 -4.905091251117594 False
6 -6.75623719662853 False
2 -8.093492715402329 False
8 -8.192416453410834 False
9 -7.853704



5 -5.77799343429354 False




9 -6.591803321334266 False
7 -2.3673957673885857 False




4 -4.250813147599771 False




1 -11.509995892926003 False
Episode 531, Total Reward: -2038.1510222131526
3 -6.226340755216833 False
9 -5.895412073873364 False
5 -6.591803321334266 False
8 -4.071101157390693 False
Episode 532, Total Reward: -3022.7846573078155
9 -8.579293986144062 False
1 -9.070180154761154 False
6 -2.0415606574273 False
2 -8.093492715402329 False
4 -4.585675628012074 False
7 -4.250813147599771 False
8 -9.21539248558825 False
Episode 533, Total Reward: -1545.8364087749349
7 -8.756186467218008 False
8 -9.21539248558825 False
9 -7.853704773177191 False
2 -2.2550833802954076 False
6 -8.093492715402329 False
3 -6.75623719662853 False
5 -4.905091251117594 False
Episode 534, Total Reward: -1547.8351882694271
7 -8.756186467218008 False
2 -1.1138456925079876 False
3 -6.841918956903541 False
6 -6.75623719662853 False
1 -2.0415606574273 False
9 -9.070180154761154 False
4 -2.4969092474766588 False
Episode 535, Total Reward: -1537.0768383729232
5 -1.9895625089960105 False
8 -4.071101157390693 False
3 -2.6150299



4 -9.351745191179539 False
2 -4.585675628012074 False




7 -1.1138456925079876 False
6 -9.189377103453513 False




9 -9.089599829761081 False
5 -6.591803321334266 False




Episode 541, Total Reward: -1544.4910917530271
9 -8.579293986144062 False
7 -2.3673957673885857 False
4 -4.250813147599771 False
3 -6.980965374268762 False
8 -2.6150299035155595 False
6 -5.004577401674917 False
2 -8.093492715402329 False
Episode 542, Total Reward: -1537.891568295994
2 -7.653020649610424 False
9 -2.2550833802954076 False
7 -2.3673957673885857 False
8 -9.21539248558825 False
6 -5.004577401674917 False
1 -2.0415606574273 False
5 -3.2013328857401957 False
Episode 543, Total Reward: -1531.7383632277251
6 -0.557563640416601 False
4 -11.374743052916678 False
5 -8.86012987256621 False
8 -4.071101157390693 False
7 -9.21539248558825 False
Episode 544, Total Reward: -2534.0789302088783
7 -8.756186467218008 False
4 -4.250813147599771 False
3 -6.980965374268762 False
9 -5.895412073873364 False
1 -9.070180154761154 False
Episode 545, Total Reward: -2534.953557217721
2 -7.653020649610424 False
4 -4.585675628012074 False
3 -6.980965374268762 False
1 -8.062794569850828 False
8 -6.7421



Episode 550, Total Reward: -2531.5838355898504
1 -2.2601094764485308 False




6 -2.0415606574273 False
7 -9.189377103453513 False




3 -7.705775893101598 False
8 -2.6150299035155595 False




5 -4.071101157390693 False
9 -6.591803321334266 False




4 -2.4969092474766588 False
Episode 551, Total Reward: -1036.971666760148
1 -2.2601094764485308 False
4 -11.509995892926003 False
8 -9.351745191179539 False
7 -9.21539248558825 False
Episode 552, Total Reward: -3032.3372430461422
9 -8.579293986144062 False
8 -7.853704773177191 False
5 -4.071101157390693 False
3 -4.905091251117594 False
2 -6.841918956903541 False
6 -8.093492715402329 False
7 -9.189377103453513 False
Episode 553, Total Reward: -1549.533979943589
1 -2.2601094764485308 False
2 -7.620784652302808 False
9 -2.2550833802954076 False
5 -6.591803321334266 False
4 -8.86012987256621 False
Episode 554, Total Reward: -2527.5879107029473
2 -7.653020649610424 False
7 -1.1138456925079876 False
8 -9.21539248558825 False
4 -9.351745191179539 False
6 -11.374743052916678 False
9 -9.089599829761081 False
3 -5.895412073873364 False
Episode 555, Total Reward: -1553.6937589754373
1 -2.2601094764485308 False
8 -6.742121402603406 False
3 -2.6150299035155595 False
2 -6.841918956903541 False
4 -4.



Episode 560, Total Reward: -2524.002598802082
6 -0.557563640416601 False




8 -5.004577401674917 False
5 -4.071101157390693 False




4 -8.86012987256621 False
9 -2.4969092474766588 False




2 -2.2550833802954076 False




3 -6.841918956903541 False
Episode 561, Total Reward: -1530.0872836567241
4 -10.843880223714587 False
8 -9.351745191179539 False
3 -2.6150299035155595 False
1 -8.062794569850828 False
6 -2.0415606574273 False
9 -9.089599829761081 False
Episode 562, Total Reward: -2042.0046103754487
3 -6.226340755216833 False
7 -7.705775893101598 False
2 -1.1138456925079876 False
5 -5.77799343429354 False
9 -6.591803321334266 False
4 -2.4969092474766588 False
6 -11.374743052916678 False
Episode 563, Total Reward: -1541.2874113968476
3 -6.226340755216833 False
7 -7.705775893101598 False
6 -9.189377103453513 False
2 -8.093492715402329 False
Episode 564, Total Reward: -3031.214986467174
1 -2.2601094764485308 False
3 -8.062794569850828 False
5 -4.905091251117594 False
7 -6.890753875802023 False
2 -1.1138456925079876 False
Episode 565, Total Reward: -2523.232594865727
4 -10.843880223714587 False
5 -8.86012987256621 False
1 -3.2013328857401957 False
3 -8.062794569850828 False
7 -7.705775893101598 False
Episod



2 -8.093492715402329 False
4 -4.585675628012074 False




3 -6.980965374268762 False
5 -4.905091251117594 False




1 -3.2013328857401957 False
8 -6.742121402603406 False




Episode 571, Total Reward: -1535.066242897561
9 -8.579293986144062 False
8 -7.853704773177191 False
2 -8.192416453410834 False
4 -4.585675628012074 False
7 -4.250813147599771 False
6 -9.189377103453513 False
Episode 572, Total Reward: -2042.6512810917977
5 -1.9895625089960105 False
8 -4.071101157390693 False
4 -9.351745191179539 False
9 -2.4969092474766588 False
6 -9.089599829761081 False
Episode 573, Total Reward: -2526.998917934804
3 -6.226340755216833 False
4 -6.980965374268762 False
2 -4.585675628012074 False
5 -5.77799343429354 False
9 -6.591803321334266 False
8 -7.853704773177191 False
6 -5.004577401674917 False
Episode 574, Total Reward: -1543.0210606879775
9 -8.579293986144062 False
4 -2.4969092474766588 False
3 -6.980965374268762 False
5 -4.905091251117594 False
7 -6.890753875802023 False
2 -1.1138456925079876 False
Episode 575, Total Reward: -2030.966859427317
9 -8.579293986144062 False
8 -7.853704773177191 False
7 -9.21539248558825 False
2 -1.1138456925079876 False
6 -8.0934



3 -6.841918956903541 False
1 -8.062794569850828 False




4 -11.509995892926003 False




6 -11.374743052916678 False
5 -2.5146136553162997 False




Episode 581, Total Reward: -2047.9570867775237
7 -8.756186467218008 False
1 -8.63933616755769 False
4 -11.509995892926003 False
6 -11.374743052916678 False
9 -9.089599829761081 False
2 -2.2550833802954076 False
Episode 582, Total Reward: -2051.624944790675
6 -0.557563640416601 False
3 -6.75623719662853 False
1 -8.062794569850828 False
8 -6.742121402603406 False
7 -9.21539248558825 False
Episode 583, Total Reward: -2531.3341092950877
7 -8.756186467218008 False
6 -9.189377103453513 False
8 -5.004577401674917 False
1 -6.742121402603406 False
4 -11.509995892926003 False
3 -6.980965374268762 False
9 -5.895412073873364 False
Episode 584, Total Reward: -1554.078635716018
9 -8.579293986144062 False
6 -9.089599829761081 False
3 -6.75623719662853 False
5 -4.905091251117594 False
1 -3.2013328857401957 False
2 -7.620784652302808 False
Episode 585, Total Reward: -2040.1523398016939
6 -0.557563640416601 False
7 -9.189377103453513 False
1 -8.63933616755769 False
4 -11.509995892926003 False
9 -2.49690



9 -2.4969092474766588 False
Episode 590, Total Reward: -2042.6563085665753
6 -0.557563640416601 False




7 -9.189377103453513 False
4 -4.250813147599771 False




9 -2.4969092474766588 False
8 -7.853704773177191 False




2 -8.192416453410834 False
Episode 591, Total Reward: -2032.5407843655346
7 -8.756186467218008 False
3 -7.705775893101598 False
8 -2.6150299035155595 False
9 -7.853704773177191 False
6 -9.089599829761081 False
4 -11.374743052916678 False
Episode 592, Total Reward: -2047.3950399196901
6 -0.557563640416601 False
5 -2.5146136553162997 False
3 -4.905091251117594 False
7 -7.705775893101598 False
9 -2.3673957673885857 False
2 -2.2550833802954076 False
8 -8.192416453410834 False
4 -9.351745191179539 False
Episode 593, Total Reward: -1037.8496852322266
1 -2.2601094764485308 False
7 -8.63933616755769 False
5 -6.890753875802023 False
3 -4.905091251117594 False
8 -2.6150299035155595 False
4 -9.351745191179539 False
Episode 594, Total Reward: -2034.662065865621
5 -1.9895625089960105 False
1 -3.2013328857401957 False
8 -6.742121402603406 False
2 -8.192416453410834 False
7 -1.1138456925079876 False
6 -9.189377103453513 False
9 -9.089599829761081 False
Episode 595, Total Reward: -1539.518255876473
3 



Episode 600, Total Reward: -1533.4879084155914
3 -6.226340755216833 False




7 -7.705775893101598 False
4 -4.250813147599771 False




8 -9.351745191179539 False
9 -7.853704773177191 False




Episode 601, Total Reward: -2535.388379760275
2 -7.653020649610424 False
7 -1.1138456925079876 False
4 -4.250813147599771 False
5 -8.86012987256621 False
8 -4.071101157390693 False
9 -7.853704773177191 False
3 -5.895412073873364 False
Episode 602, Total Reward: -1539.6980273667257
8 -4.569044986778715 False
9 -7.853704773177191 False
1 -9.070180154761154 False
7 -8.63933616755769 False
5 -6.890753875802023 False
2 -5.77799343429354 False
Episode 603, Total Reward: -2042.8010133923704
4 -10.843880223714587 False
1 -11.509995892926003 False
3 -8.062794569850828 False
8 -2.6150299035155595 False
6 -5.004577401674917 False
Episode 604, Total Reward: -2538.036277991682
5 -1.9895625089960105 False
6 -2.5146136553162997 False
9 -9.089599829761081 False
3 -5.895412073873364 False
7 -7.705775893101598 False
Episode 605, Total Reward: -2527.194963961048
9 -8.579293986144062 False
3 -5.895412073873364 False
2 -6.841918956903541 False
6 -8.093492715402329 False
4 -11.374743052916678 False
8 -9.351



2 -7.653020649610424 False
3 -6.841918956903541 False




8 -2.6150299035155595 False




6 -5.004577401674917 False
Episode 611, Total Reward: -3022.1145469117046
2 -7.653020649610424 False
6 -8.093492715402329 False
9 -9.089599829761081 False
4 -2.4969092474766588 False
7 -4.250813147599771 False
3 -7.705775893101598 False
1 -8.062794569850828 False
8 -6.742121402603406 False
Episode 612, Total Reward: -1054.094527455406
1 -2.2601094764485308 False
7 -8.63933616755769 False
5 -6.890753875802023 False
8 -4.071101157390693 False
Episode 613, Total Reward: -3021.861300677199
3 -6.226340755216833 False
7 -7.705775893101598 False
4 -4.250813147599771 False
6 -11.374743052916678 False
1 -2.0415606574273 False
5 -3.2013328857401957 False
2 -5.77799343429354 False
Episode 614, Total Reward: -1540.578559826296
5 -1.9895625089960105 False
1 -3.2013328857401957 False
9 -9.070180154761154 False
6 -9.089599829761081 False
2 -8.093492715402329 False
3 -6.841918956903541 False
4 -6.980965374268762 False
Episode 615, Total Reward: -1545.267052425833
8 -4.569044986778715 False
9 -7.853704



7 -2.3673957673885857 False
Episode 620, Total Reward: -2520.540718201658
6 -0.557563640416601 False




4 -11.374743052916678 False
2 -4.585675628012074 False




1 -7.620784652302808 False
8 -6.742121402603406 False




Episode 621, Total Reward: -2530.8808883762513
2 -7.653020649610424 False
7 -1.1138456925079876 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
3 -6.75623719662853 False
4 -6.980965374268762 False
Episode 622, Total Reward: -2033.9610645101652
8 -4.569044986778715 False
6 -5.004577401674917 False
3 -6.75623719662853 False
2 -6.841918956903541 False
4 -4.585675628012074 False
1 -11.509995892926003 False
Episode 623, Total Reward: -2039.267450062924
6 -0.557563640416601 False
1 -2.0415606574273 False
4 -11.509995892926003 False
8 -9.351745191179539 False
9 -7.853704773177191 False
2 -2.2550833802954076 False
Episode 624, Total Reward: -2033.569653535422
2 -7.653020649610424 False
3 -6.841918956903541 False
8 -2.6150299035155595 False
9 -7.853704773177191 False
4 -2.4969092474766588 False
Episode 625, Total Reward: -2527.4605835306834
6 -0.557563640416601 False
4 -11.374743052916678 False
5 -8.86012987256621 False
7 -6.890753875802023 False
3 -7.705775893101598 False
9 -5.895



5 -6.890753875802023 False
Episode 630, Total Reward: -1558.726862698462
7 -8.756186467218008 False




1 -8.63933616755769 False
4 -11.509995892926003 False




6 -11.374743052916678 False
5 -2.5146136553162997 False




Episode 631, Total Reward: -2542.7948752359343
9 -8.579293986144062 False
5 -6.591803321334266 False
6 -2.5146136553162997 False
2 -8.093492715402329 False
4 -4.585675628012074 False
7 -4.250813147599771 False
Episode 632, Total Reward: -2034.6156924538088
3 -6.226340755216833 False
8 -2.6150299035155595 False
1 -6.742121402603406 False
4 -11.509995892926003 False
7 -4.250813147599771 False
5 -6.890753875802023 False
9 -6.591803321334266 False
2 -2.2550833802954076 False
6 -8.093492715402329 True
Episode 633, Total Reward: -555.7329980351122
7 -8.756186467218008 False
3 -7.705775893101598 False
5 -4.905091251117594 False
1 -3.2013328857401957 False
2 -7.620784652302808 False
4 -4.585675628012074 False
Episode 634, Total Reward: -2036.7748467774923
2 -7.653020649610424 False
8 -8.192416453410834 False
7 -9.21539248558825 False
5 -6.890753875802023 False
4 -8.86012987256621 False
9 -2.4969092474766588 False
Episode 635, Total Reward: -2043.3086225844543
1 -2.2601094764485308 False
7 -8.6



3 -6.75623719662853 False




4 -6.980965374268762 False




7 -4.250813147599771 False
Episode 641, Total Reward: -3018.5455793589135
2 -7.653020649610424 False
3 -6.841918956903541 False
8 -2.6150299035155595 False
9 -7.853704773177191 False
Episode 642, Total Reward: -3024.9636742832063
8 -4.569044986778715 False
3 -2.6150299035155595 False
5 -4.905091251117594 False
6 -2.5146136553162997 False
9 -9.089599829761081 False
2 -2.2550833802954076 False
1 -7.620784652302808 False
Episode 643, Total Reward: -1533.5692476590873
7 -8.756186467218008 False
6 -9.189377103453513 False
1 -2.0415606574273 False
4 -11.509995892926003 False
Episode 644, Total Reward: -3031.4971201210246
2 -7.653020649610424 False
1 -7.620784652302808 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
4 -11.374743052916678 False
Episode 645, Total Reward: -2046.7448801195374
5 -1.9895625089960105 False
9 -6.591803321334266 False
2 -2.2550833802954076 False
8 -8.192416453410834 False
7 -9.21539248558825 False
1 -8.63933616755769 False
3 -8.



9 -9.070180154761154 False




7 -2.3673957673885857 False
8 -9.21539248558825 False




4 -9.351745191179539 False
Episode 651, Total Reward: -2532.264823075366
3 -6.226340755216833 False
2 -6.841918956903541 False
1 -7.620784652302808 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
5 -6.591803321334266 False
8 -4.071101157390693 False
4 -9.351745191179539 False
Episode 652, Total Reward: -1051.710425969274
5 -1.9895625089960105 False
3 -4.905091251117594 False
8 -2.6150299035155595 False
4 -9.351745191179539 False
Episode 653, Total Reward: -3018.8614288548088
2 -7.653020649610424 False
5 -5.77799343429354 False
8 -4.071101157390693 False
4 -9.351745191179539 False
3 -6.980965374268762 False
9 -5.895412073873364 False
Episode 654, Total Reward: -2039.7302378806164
3 -6.226340755216833 False
1 -8.062794569850828 False
2 -7.620784652302808 False
5 -5.77799343429354 False
8 -4.071101157390693 False
7 -9.21539248558825 False
9 -2.3673957673885857 False
6 -9.089599829761081 False
Episode 655, Total Reward: -1052.4314026517925
5 -1.9895625089960105 False
7 -6.89075



Episode 660, Total Reward: -1547.7238249962784
9 -8.579293986144062 False




3 -5.895412073873364 False
2 -6.841918956903541 False




5 -5.77799343429354 False




7 -6.890753875802023 False
Episode 661, Total Reward: -2533.985372327016
7 -8.756186467218008 False
8 -9.21539248558825 False
5 -4.071101157390693 False
2 -5.77799343429354 False
4 -4.585675628012074 False
6 -11.374743052916678 False
Episode 662, Total Reward: -2043.7810922254193
7 -8.756186467218008 False
9 -2.3673957673885857 False
8 -7.853704773177191 False
2 -8.192416453410834 False
5 -5.77799343429354 False
4 -8.86012987256621 False
Episode 663, Total Reward: -2041.8078267680546
8 -4.569044986778715 False
3 -2.6150299035155595 False
7 -7.705775893101598 False
4 -4.250813147599771 False
9 -2.4969092474766588 False
1 -9.070180154761154 False
Episode 664, Total Reward: -2030.7077533332335
2 -7.653020649610424 False
9 -2.2550833802954076 False
1 -9.070180154761154 False
4 -11.509995892926003 False
3 -6.980965374268762 False
6 -6.75623719662853 False
Episode 665, Total Reward: -2044.2254826484905
1 -2.2601094764485308 False
9 -9.070180154761154 False
7 -2.3673957673885857 False
3 -7.70



1 -8.062794569850828 False
2 -7.620784652302808 False




5 -5.77799343429354 False




7 -6.890753875802023 False
Episode 671, Total Reward: -2534.5786672874656
8 -4.569044986778715 False
5 -4.071101157390693 False
9 -6.591803321334266 False
3 -5.895412073873364 False
2 -6.841918956903541 False
6 -8.093492715402329 False
Episode 672, Total Reward: -2036.062773211683
4 -10.843880223714587 False
9 -2.4969092474766588 False
1 -9.070180154761154 False
Episode 673, Total Reward: -3522.4109696259525
4 -10.843880223714587 False
8 -9.351745191179539 False
5 -4.071101157390693 False
2 -5.77799343429354 False
6 -8.093492715402329 False
1 -2.0415606574273 False
9 -9.070180154761154 False
7 -2.3673957673885857 False
Episode 674, Total Reward: -1051.6173493015576
4 -10.843880223714587 False
7 -4.250813147599771 False
5 -6.890753875802023 False
6 -2.5146136553162997 False
9 -9.089599829761081 False
1 -9.070180154761154 False
Episode 675, Total Reward: -2042.659840886955
7 -8.756186467218008 False
8 -9.21539248558825 False
6 -5.004577401674917 False
5 -2.5146136553162997 False
1 -3.201



7 -8.756186467218008 False
2 -1.1138456925079876 False




5 -5.77799343429354 False
9 -6.591803321334266 False




4 -2.4969092474766588 False




1 -11.509995892926003 False
Episode 681, Total Reward: -2036.2467340557566
2 -7.653020649610424 False
7 -1.1138456925079876 False
3 -7.705775893101598 False
4 -6.980965374268762 False
9 -2.4969092474766588 False
6 -9.089599829761081 False
5 -2.5146136553162997 False
Episode 682, Total Reward: -1537.5547303420428
1 -2.2601094764485308 False
5 -3.2013328857401957 False
4 -8.86012987256621 False
6 -11.374743052916678 False
3 -6.75623719662853 False
9 -5.895412073873364 False
Episode 683, Total Reward: -2038.3479645581735
1 -2.2601094764485308 False
8 -6.742121402603406 False
6 -5.004577401674917 False
2 -8.093492715402329 False
5 -5.77799343429354 False
Episode 684, Total Reward: -2527.878294430423
2 -7.653020649610424 False
6 -8.093492715402329 False
1 -2.0415606574273 False
7 -8.63933616755769 False
9 -2.3673957673885857 False
4 -2.4969092474766588 False
Episode 685, Total Reward: -2031.291715204863
4 -10.843880223714587 False
7 -4.250813147599771 False
5 -6.890753875802023 False
2 -5.7



Episode 690, Total Reward: -2525.0620100841625
5 -1.9895625089960105 False




1 -3.2013328857401957 False
8 -6.742121402603406 False




9 -7.853704773177191 False
Episode 691, Total Reward: -3019.7867215705164
4 -10.843880223714587 False
1 -11.509995892926003 False
9 -9.070180154761154 False
7 -2.3673957673885857 False
5 -6.890753875802023 False
8 -4.071101157390693 False
Episode 692, Total Reward: -2044.7533070719833
1 -2.2601094764485308 False
4 -11.509995892926003 False
5 -8.86012987256621 False
9 -6.591803321334266 False
3 -5.895412073873364 False
8 -2.6150299035155595 False
6 -5.004577401674917 False
7 -9.189377103453513 False
Episode 693, Total Reward: -1051.9264350457925
1 -2.2601094764485308 False
9 -9.070180154761154 False
8 -7.853704773177191 False
3 -2.6150299035155595 False
6 -6.75623719662853 False
Episode 694, Total Reward: -2528.555261504531
4 -10.843880223714587 False
8 -9.351745191179539 False
7 -9.21539248558825 False
3 -7.705775893101598 False
1 -8.062794569850828 False
2 -7.620784652302808 False
5 -5.77799343429354 False
Episode 695, Total Reward: -1558.5783664500314
7 -8.756186467218008 False
2 -1.



5 -6.591803321334266 False
Episode 700, Total Reward: -1559.5544341413079
5 -1.9895625089960105 False




2 -5.77799343429354 False
9 -2.2550833802954076 False




6 -9.089599829761081 False




3 -6.75623719662853 False
7 -7.705775893101598 False




Episode 701, Total Reward: -2033.5742522430762
6 -0.557563640416601 False
4 -11.374743052916678 False
8 -9.351745191179539 False
9 -7.853704773177191 False
5 -6.591803321334266 False
3 -4.905091251117594 False
Episode 702, Total Reward: -2040.6346512301418
6 -0.557563640416601 False
5 -2.5146136553162997 False
7 -6.890753875802023 False
3 -7.705775893101598 False
9 -5.895412073873364 False
1 -9.070180154761154 False
Episode 703, Total Reward: -2032.634299293271
8 -4.569044986778715 False
2 -8.192416453410834 False
9 -2.2550833802954076 False
1 -9.070180154761154 False
6 -2.0415606574273 False
7 -9.189377103453513 False
Episode 704, Total Reward: -2035.317662736127
3 -6.226340755216833 False
5 -4.905091251117594 False
4 -8.86012987256621 False
1 -11.509995892926003 False
7 -8.63933616755769 False
Episode 705, Total Reward: -2540.140893939384
7 -8.756186467218008 False
3 -7.705775893101598 False
9 -5.895412073873364 False
6 -9.089599829761081 False
5 -2.5146136553162997 False
2 -5.777993



4 -10.843880223714587 False
5 -8.86012987256621 False




2 -5.77799343429354 False




3 -6.841918956903541 False




9 -5.895412073873364 False
Episode 711, Total Reward: -2538.2193345613514
6 -0.557563640416601 False
7 -9.189377103453513 False
9 -2.3673957673885857 False
4 -2.4969092474766588 False
5 -8.86012987256621 False
8 -4.071101157390693 False
Episode 712, Total Reward: -2027.5424767886925
1 -2.2601094764485308 False
9 -9.070180154761154 False
6 -9.089599829761081 False
8 -5.004577401674917 False
5 -4.071101157390693 False
7 -6.890753875802023 False
Episode 713, Total Reward: -2036.3863218958384
9 -8.579293986144062 False
6 -9.089599829761081 False
2 -8.093492715402329 False
5 -5.77799343429354 False
3 -4.905091251117594 False
7 -7.705775893101598 False
Episode 714, Total Reward: -2044.1512471098204
5 -1.9895625089960105 False
1 -3.2013328857401957 False
6 -2.0415606574273 False
8 -5.004577401674917 False
2 -8.192416453410834 False
4 -4.585675628012074 False
7 -4.250813147599771 False
Episode 715, Total Reward: -1529.2659386828611
7 -8.756186467218008 False
1 -8.63933616755769 False
5 -3.2013



Episode 720, Total Reward: -2041.9964693756317
9 -8.579293986144062 False




8 -7.853704773177191 False
3 -2.6150299035155595 False




6 -6.75623719662853 False
5 -2.5146136553162997 False




2 -5.77799343429354 False
1 -7.620784652302808 False




7 -8.63933616755769 False
Episode 721, Total Reward: -1050.3569937689358
5 -1.9895625089960105 False
4 -8.86012987256621 False
8 -9.351745191179539 False
2 -8.192416453410834 False
1 -7.620784652302808 False
Episode 722, Total Reward: -2536.0146386784554
2 -7.653020649610424 False
7 -1.1138456925079876 False
9 -2.3673957673885857 False
1 -9.070180154761154 False
8 -6.742121402603406 False
4 -9.351745191179539 False
Episode 723, Total Reward: -2036.2983088580513
1 -2.2601094764485308 False
4 -11.509995892926003 False
3 -6.980965374268762 False
8 -2.6150299035155595 False
6 -5.004577401674917 False
9 -9.089599829761081 False
7 -2.3673957673885857 False
Episode 724, Total Reward: -1539.8276736459834
6 -0.557563640416601 False
8 -5.004577401674917 False
2 -8.192416453410834 False
3 -6.841918956903541 False
9 -5.895412073873364 False
1 -9.070180154761154 False
Episode 725, Total Reward: -2035.5620686810403
2 -7.653020649610424 False
9 -2.2550833802954076 False
1 -9.070180154761154 False
5 -



9 -5.895412073873364 False




1 -9.070180154761154 False
4 -11.509995892926003 False




5 -8.86012987256621 False
8 -4.071101157390693 False




Episode 731, Total Reward: -2045.6331599067344
6 -0.557563640416601 False
8 -5.004577401674917 False
9 -7.853704773177191 False
1 -9.070180154761154 False
Episode 732, Total Reward: -3022.48602597003
1 -2.2601094764485308 False
4 -11.509995892926003 False
2 -4.585675628012074 False
6 -8.093492715402329 False
9 -9.089599829761081 False
Episode 733, Total Reward: -2535.53887354255
9 -8.579293986144062 False
4 -2.4969092474766588 False
6 -11.374743052916678 False
5 -2.5146136553162997 False
7 -6.890753875802023 False
8 -9.21539248558825 False
Episode 734, Total Reward: -2041.0717063032437
6 -0.557563640416601 False
3 -6.75623719662853 False
5 -4.905091251117594 False
8 -4.071101157390693 False
7 -9.21539248558825 False
1 -8.63933616755769 False
Episode 735, Total Reward: -2034.1447218986993
4 -10.843880223714587 False
2 -4.585675628012074 False
3 -6.841918956903541 False
6 -6.75623719662853 False
7 -9.189377103453513 False
5 -6.890753875802023 False
Episode 736, Total Reward: -2045.107842



5 -2.5146136553162997 False
Episode 740, Total Reward: -3024.9655599418534
3 -6.226340755216833 False




6 -6.75623719662853 False




5 -2.5146136553162997 False
4 -8.86012987256621 False




1 -11.509995892926003 False




8 -6.742121402603406 False
Episode 741, Total Reward: -2042.6094387752573
2 -7.653020649610424 False
3 -6.841918956903541 False
1 -8.062794569850828 False
8 -6.742121402603406 False
5 -4.071101157390693 False
6 -2.5146136553162997 False
7 -9.189377103453513 False
9 -2.3673957673885857 False
Episode 742, Total Reward: -1047.4423432625174
7 -8.756186467218008 False
3 -7.705775893101598 False
9 -5.895412073873364 False
5 -6.591803321334266 False
8 -4.071101157390693 False
Episode 743, Total Reward: -2533.020278912918
5 -1.9895625089960105 False
7 -6.890753875802023 False
1 -8.63933616755769 False
9 -9.070180154761154 False
6 -9.089599829761081 False
Episode 744, Total Reward: -2535.6794325368783
1 -2.2601094764485308 False
7 -8.63933616755769 False
2 -1.1138456925079876 False
5 -5.77799343429354 False
8 -4.071101157390693 False
4 -9.351745191179539 False
Episode 745, Total Reward: -2031.2141311193782
6 -0.557563640416601 False
1 -2.0415606574273 False
9 -9.070180154761154 False
4 -2.49690



7 -2.3673957673885857 False




3 -7.705775893101598 False




4 -6.980965374268762 False




5 -8.86012987256621 False




2 -5.77799343429354 False
6 -8.093492715402329 False




Episode 751, Total Reward: -1548.3650470431653
4 -10.843880223714587 False
3 -6.980965374268762 False
7 -7.705775893101598 False
9 -2.3673957673885857 False
8 -7.853704773177191 False
6 -5.004577401674917 False
Episode 752, Total Reward: -2040.7562994333257
1 -2.2601094764485308 False
4 -11.509995892926003 False
7 -4.250813147599771 False
8 -9.21539248558825 False
5 -4.071101157390693 False
Episode 753, Total Reward: -2531.307412159953
9 -8.579293986144062 False
7 -2.3673957673885857 False
8 -9.21539248558825 False
3 -2.6150299035155595 False
5 -4.905091251117594 False
Episode 754, Total Reward: -2527.6822033937538
4 -10.843880223714587 False
7 -4.250813147599771 False
2 -1.1138456925079876 False
5 -5.77799343429354 False
8 -4.071101157390693 False
Episode 755, Total Reward: -2526.057633655507
4 -10.843880223714587 False
6 -11.374743052916678 False
9 -9.089599829761081 False
3 -5.895412073873364 False
1 -8.062794569850828 False
Episode 756, Total Reward: -2545.2664297501165
5 -1.989562



2 -2.2550833802954076 False
3 -6.841918956903541 False




8 -2.6150299035155595 False
6 -5.004577401674917 False




4 -11.374743052916678 False
5 -8.86012987256621 False




1 -3.2013328857401957 False
Episode 761, Total Reward: -1048.7321094397564
9 -8.579293986144062 False
3 -5.895412073873364 False
4 -6.980965374268762 False
2 -4.585675628012074 False
7 -1.1138456925079876 False
Episode 762, Total Reward: -2527.155192754806
2 -7.653020649610424 False
8 -8.192416453410834 False
6 -5.004577401674917 False
5 -2.5146136553162997 False
1 -3.2013328857401957 False
7 -8.63933616755769 False
3 -7.705775893101598 False
Episode 763, Total Reward: -1542.911073106412
3 -6.226340755216833 False
7 -7.705775893101598 False
1 -8.63933616755769 False
8 -6.742121402603406 False
9 -7.853704773177191 False
Episode 764, Total Reward: -2537.1672789916565
6 -0.557563640416601 False
9 -9.089599829761081 False
7 -2.3673957673885857 False
1 -8.63933616755769 False
4 -11.509995892926003 False
8 -9.351745191179539 False
Episode 765, Total Reward: -2041.5156364892296
1 -2.2601094764485308 False
6 -2.0415606574273 False
3 -6.75623719662853 False
4 -6.980965374268762 False
5 -8.86012



8 -5.004577401674917 False
9 -7.853704773177191 False




3 -5.895412073873364 False
2 -6.841918956903541 False




Episode 771, Total Reward: -2526.1531768460454
7 -8.756186467218008 False
2 -1.1138456925079876 False
9 -2.2550833802954076 False
5 -6.591803321334266 False
6 -2.5146136553162997 False
8 -5.004577401674917 False
1 -6.742121402603406 False
Episode 772, Total Reward: -1532.9782313209505
9 -8.579293986144062 False
5 -6.591803321334266 False
2 -5.77799343429354 False
8 -8.192416453410834 False
4 -9.351745191179539 False
3 -6.980965374268762 False
Episode 773, Total Reward: -2045.474217760631
6 -0.557563640416601 False
8 -5.004577401674917 False
1 -6.742121402603406 False
2 -7.620784652302808 False
9 -2.2550833802954076 False
Episode 774, Total Reward: -2522.1801304772935
8 -4.569044986778715 False
7 -9.21539248558825 False
3 -7.705775893101598 False
9 -5.895412073873364 False
1 -9.070180154761154 False
6 -2.0415606574273 False
4 -11.374743052916678 False
Episode 775, Total Reward: -1549.872109304447
8 -4.569044986778715 False
1 -6.742121402603406 False
2 -7.620784652302808 False
3 -6.84191



7 -1.1138456925079876 False
6 -9.189377103453513 False




8 -5.004577401674917 False
3 -2.6150299035155595 False




5 -4.905091251117594 False




1 -3.2013328857401957 False
Episode 781, Total Reward: -1533.6822748876202
5 -1.9895625089960105 False
9 -6.591803321334266 False
6 -9.089599829761081 False
8 -5.004577401674917 False
1 -6.742121402603406 False
Episode 782, Total Reward: -2529.4176644643694
5 -1.9895625089960105 False
1 -3.2013328857401957 False
3 -8.062794569850828 False
7 -7.705775893101598 False
6 -9.189377103453513 False
2 -8.093492715402329 False
9 -2.2550833802954076 False
8 -7.853704773177191 False
Episode 783, Total Reward: -1048.351123830017
7 -8.756186467218008 False
1 -8.63933616755769 False
9 -9.070180154761154 False
2 -2.2550833802954076 False
6 -8.093492715402329 False
8 -5.004577401674917 False
Episode 784, Total Reward: -2041.8188562869095
7 -8.756186467218008 False
2 -1.1138456925079876 False
6 -8.093492715402329 False
1 -2.0415606574273 False
9 -9.070180154761154 False
Episode 785, Total Reward: -2529.075265687317
9 -8.579293986144062 False
5 -6.591803321334266 False
1 -3.2013328857401957 False
8 -6.7



Episode 790, Total Reward: -2049.99666821622
8 -4.569044986778715 False




6 -5.004577401674917 False
2 -8.093492715402329 False




4 -4.585675628012074 False




1 -11.509995892926003 False




9 -9.070180154761154 False
5 -6.591803321334266 False




Episode 791, Total Reward: -1549.4247701008894
4 -10.843880223714587 False
8 -9.351745191179539 False
2 -8.192416453410834 False
5 -5.77799343429354 False
1 -3.2013328857401957 False
7 -8.63933616755769 False
Episode 792, Total Reward: -2046.0067043558965
9 -8.579293986144062 False
2 -2.2550833802954076 False
8 -8.192416453410834 False
3 -2.6150299035155595 False
Episode 793, Total Reward: -3021.6418237233656
8 -4.569044986778715 False
1 -6.742121402603406 False
6 -2.0415606574273 False
3 -6.75623719662853 False
5 -4.905091251117594 False
4 -8.86012987256621 False
7 -4.250813147599771 False
9 -2.3673957673885857 False
Episode 794, Total Reward: -1040.4923942821101
3 -6.226340755216833 False
8 -2.6150299035155595 False
4 -9.351745191179539 False
2 -4.585675628012074 False
6 -8.093492715402329 False
1 -2.0415606574273 False
9 -9.070180154761154 False
Episode 795, Total Reward: -1541.9840250055147
2 -7.653020649610424 False
1 -7.620784652302808 False
6 -2.0415606574273 False
3 -6.75623719