In [3]:
# pip install gym
import gym

import numpy as np
import random
import os
import collections

import time

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.animation as animation

mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

mpl.rc('animation', html='jshtml')

from array2gif import write_gif

### OpenAI Gym. Маятник на тележке - Cartpole

![cartpole](https://www.gymlibrary.dev/_images/cart_pole.gif)

<font size="3">
<p>Эта среда - одна из систем в <a href='https://www.gymlibrary.dev/environments/classic_control/index.html'>Classic Control environments</a>.</p>
<div class="table-wrapper colwidths-auto docutils container">
<table class="docutils align-default">
<thead>
<tr class="row-odd"><th class="head"><p></p></th>
<th class="head"><p></p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Пространство действий</p></td>
<td><p>Discrete(2)</p></td>
</tr>
<tr class="row-odd"><td><p>Размер вектора состояния</p></td>
<td><p>(4,)</p></td>
</tr>
<tr class="row-even"><td><p>Верхние границы</p></td>
<td><p>[4.8   inf 0.42  inf]</p></td>
</tr>
<tr class="row-odd"><td><p>Нижние границы</p></td>
<td><p>[-4.8   -inf -0.42  -inf]</p></td>
</tr>
<tr class="row-even"><td><p>Импорт</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">gym.make(&quot;CartPole-v1&quot;)</span></code></p></td>
</tr>
</tbody>
</table>
</font>

In [None]:
env = gym.make('CartPole-v1')

### Состояния

<table class="docutils align-default">
<thead>
<tr class="row-odd"><th class="head"><p>№</p></th>
<th class="head"><p>Переменная</p></th>
<th class="head"><p>Min</p></th>
<th class="head"><p>Max</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>0</p></td>
<td><p>Положение тележки</p></td>
<td><p>-4.8</p></td>
<td><p>4.8</p></td>
</tr>
<tr class="row-odd"><td><p>1</p></td>
<td><p>Скорость тележки</p></td>
<td><p>-Inf</p></td>
<td><p>Inf</p></td>
</tr>
<tr class="row-even"><td><p>2</p></td>
<td><p>Угол маятника с вертикалью</p></td>
<td><p>~ -0.418 rad (-24°)</p></td>
<td><p>~ 0.418 rad (24°)</p></td>
</tr>
<tr class="row-odd"><td><p>3</p></td>
<td><p>Угловая скорость маятника</p></td>
<td><p>-Inf</p></td>
<td><p>Inf</p></td>
</tr>
</tbody>
</table>

In [None]:
env.observation_space.shape[0]

In [None]:
obs = env.reset()
obs

In [None]:
Obs = collections.namedtuple("CartPole_obs", "x v theta omega")
Obs(*obs)

In [None]:
img = env.render(mode="rgb_array")
plt.imshow(img)

### Начальные условия

Все переменные векора состояний инициализируются случайными величинами равномерно распределёнными в интервале (-0.05, 0.05)


### Начало эпизода

In [None]:
obs = env.reset()
obs0 = Obs(*obs)
print(obs0)

### Действия

<table class="docutils align-default">
<thead>
<tr class="row-odd"><th class="head"><p>№</p></th>
<th class="head"><p>Действие</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>0</p></td>
<td><p>Двинуть тележку налево</p></td>
</tr>
<tr class="row-odd"><td><p>1</p></td>
<td><p>Двинуть тележку направо</p></td>
</tr>
</tbody>
</table>

In [None]:
env.action_space

### Действие  $a = 1$

In [None]:
obs, reward, done, info = env.step(1)
obs1 = Obs(*obs)
print(obs1)

In [None]:
print(obs1.v > obs0.v, obs1.omega < obs0.omega)

### Действие $a = 0$

In [None]:
obs, reward, done, info = env.step(0)
obs2 = Obs(*obs)
print(obs2)

In [None]:
print(obs2.v < obs1.v, obs2.omega > obs1.omega)

### Терминальные условия

Эпизод заканчивается привыполнении одного из следующих условий:

- Termination: Маятник отклонен от вертикали больше чем на ±12°

- Termination: Положение тележки ±2.4 (центр тележки выходит за границы рисунка)

- Truncation: Эпизод продолжается 500 шагов (200 для версии v0)

### Вознаграждение

Поскольку цель задачи - балансировать маятник около верхнего состояния, вознаграждение +1 назначается за каждый шаг, пока система не достигла терминального состояния.

In [None]:
def plot_environment(env, figsize=(5,4)):
    plt.figure(figsize=figsize)
    img = env.render(mode="rgb_array")
    plt.imshow(img)
    plt.axis("off")
    return img

In [None]:
img = plot_environment(env)
plt.show()

### Пример класса для работы с Cartpole

In [None]:
class CartPolePlayer:
    
    def record_scenario(self, env, policy, num_frames=100) -> dict:
        frames = []
        obs_mat = np.empty((num_frames, 4))
        actions = np.empty((num_frames,))
        rewards = np.empty((num_frames,))
        dones = np.empty((num_frames,), dtype=int)
        first_done_info = ''
        obs = env.reset()  # initial observation
    
        for i in range(num_frames):
            action = policy(obs)
            obs, reward, done, info = env.step(action)
            img = env.render(mode="rgb_array")
            frames.append(img)
            obs_mat[i,:] = obs
            actions[i] = action
            rewards[i] = reward
            dones[i] = int(done)
        
            if done and first_done_info == '':
                first_done_info = info

        record = {'frames': frames, 'obs': obs_mat, 'actions': actions, 'rewards': 
              rewards, 'dones': dones, 'first_done_info':first_done_info}
        
        return record

    
    def update_scene(self, num, frames, patch, time_text, obs_mat, actions, cum_rewards, dones):
        patch.set_data(frames[num])
        text = f"frame: {num}"
        text += ", Obs: ({:.3f}, {:.3f}, {:.3f}, {:.3f})\n".format(*obs_mat[num,:])
        text += f"Action: {actions[num]}"
        text += f", Cumulative Reward: {cum_rewards[num]}"
        text += f", Done: {dones[num]}"
        time_text.set_text(text)
        
        return patch, time_text
    
    
    def plot_animation(self, record, repeat=False, interval=40):
        '''record should contain
        frames: list of N frames
        obs: (N, 4) array of observations
        actions: (N, ) array of actions {0, 1}
        rewards: (N, ) array of rewards at each step {0, 1}
        dones: (N, 1) array of dones {0, 1}
        '''
        cum_rewards = np.cumsum(record['rewards'])
        frames = record['frames']
        fig = plt.figure()
        patch = plt.imshow(record['frames'][0])
        ax = plt.gca()
        time_text = ax.text(0., 0.95,'',horizontalalignment='left',verticalalignment='top', transform=ax.transAxes)
        plt.axis('off')
        anim = animation.FuncAnimation(
            fig, self.update_scene, fargs=(frames, patch, time_text, record['obs'], record['actions'], cum_rewards, record['dones']),
            frames=len(frames), repeat=repeat, interval=interval)
        plt.close()
        
        return anim   
    
    
    def test_policy(self, policy_func, n_scenario = 1000, max_actions = 500, verbose=False):
        final_rewards = []
        for episode in range(n_scenario):
            if verbose and episode % 50 == 0:
                print(episode)
            episode_rewards = 0
            obs = env.reset()  # reset to a random position
            for step in range(max_actions):
                action = policy_func(obs)
                obs, reward, done, info = env.step(action)
                episode_rewards += reward
                if done:
                    break
            final_rewards.append(episode_rewards)
    
        return final_rewards

    
    def plot_policy(self, final_rewards, policy_name:str=''):
        fig = plt.plot(range(len(final_rewards)), final_rewards)
        plt.grid()
        plt.title(policy_name + " Mean Reward {:.2f}, Std Reward {:.2f}".format(np.mean(final_rewards), np.min(final_rewards)))
        plt.ylabel('Cum Reward')
        plt.xlabel('Iteration')
        plt.ylim(0, max(final_rewards) * 1.1)
        
        return fig
    
    
    def save_gif(self, recorded_episode):
        recorded_episode['frames'][1].shape
        write_gif([np.transpose(f, axes=[2,0, 1]) for f in recorded_episode['frames']], 'recorded_episode.gif', fps=30)

In [None]:
player = CartPolePlayer()

### Стратегия случайных действий

In [None]:
def rand_policy(obs):
    return random.randint(0, 1)

env.seed(42)
random.seed(0)

rand_rewards = player.test_policy(rand_policy)
pt = player.plot_policy(rand_rewards, "Random Policy")

### Наивная стратегия (управление по отклонению маятника)

In [None]:
def theta_policy(obs):
    angle = obs[2]
    return 0 if angle < 0 else 1

env.seed(42)
random.seed(0)

theta_rewards = player.test_policy(theta_policy)
pt = player.plot_policy(theta_rewards, "Theta Policy")

### Наивная стратегия (управление по угловой скорости)

In [None]:
def omega_policy(obs):
    w = obs[3]
    return 1 if w > 0 else 0

env.seed(42)
random.seed(0)

omega_rewards = player.test_policy(omega_policy)
pt = player.plot_policy(omega_rewards, "Omega Policy")

### Комбинация двух наивных стратегий, внезапно приводящая к успеху

In [None]:
def theta_omega_policy(obs):
    theta, w = obs[2:4]
    if abs(theta) < 0.03:
        return 0 if w < 0 else 1
    else:
        return 0 if theta < 0 else 1
    
env.seed(42)
random.seed(0)

# the cart-pole experiment will end if it lasts more than 500 steps, with info="'TimeLimit.truncated': True"
theta_omega_rewards = player.test_policy(theta_omega_policy, max_actions=510) 
player.plot_policy(theta_omega_rewards, "Theta-Omega Policy")

### Запись эпизода

In [None]:
theta_record = player.record_scenario(env, theta_omega_policy, 100)

### Анимация

In [None]:
player.plot_animation(theta_record)

### Сохранение gif

In [None]:
player.save_gif(theta_record)

# строка для маркдаун - <img src="recorded_episode.gif" width="750" align="center">

In [27]:
class Q_Learning:
    ###########################################################################
    #   START - __init__ function
    ###########################################################################
    # INPUTS: 
    # env - Cart Pole environment
    # alpha - step size 
    # gamma - discount rate
    # epsilon - parameter for epsilon-greedy approach
    # numberEpisodes - total number of simulation episodes
     
    # numberOfBins - this is a 4 dimensional list that defines the number of grid points 
    # for state discretization
    # that is, this list contains number of bins for every state entry, 
    # we have 4 entries, that is,
    # discretization for cart position, cart velocity, pole angle, and pole angular velocity
     
    # lowerBounds - lower bounds (limits) for discretization, list with 4 entries:
    # lower bounds on cart position, cart velocity, pole angle, and pole angular velocity
 
    # upperBounds - upper bounds (limits) for discretization, list with 4 entries:
    # upper bounds on cart position, cart velocity, pole angle, and pole angular velocity
     
    def __init__(self, env, alpha,gamma, epsilon, episodesCount, binCounts, lowerBounds, upperBounds):
        import numpy as np
         
        self.env = env
        self.alpha = alpha
        self.gamma = gamma 
        self.epsilon = epsilon 
        self.actionsCount = env.action_space.n 
        self.episodesCount = episodesCount
        self.binCounts = binCounts
        self.lowerBounds = lowerBounds
        self.upperBounds = upperBounds
         
        # this list stores sum of rewards in every learning episode
        self.sumRewardsEpisode = []
         
        # this matrix is the action value function matrix 
        binSz = (binCounts[0], binCounts[1], binCounts[2], binCounts[3], self.actionsCount)
        self.Qmatrix = np.random.uniform(low=0, high=1, size=binSz)
        

    ###########################################################################
    # START: function "calcStateIdx"
    # for the given 4-dimensional state, and discretization grid defined by 
    # numberOfBins, lowerBounds, and upperBounds, this function will return 
    # the index tuple (4-dimensional) that is used to index entries of the 
    # of the QvalueMatrix 
 
 
    # INPUTS:
    # state - state list/array, 4 entries: 
    # cart position, cart velocity, pole angle, and pole angular velocity
 
    # OUTPUT: 4-dimensional tuple defining the indices of the QvalueMatrix 
    # that correspond to "state" input
 
    ###############################################################################
    def calcStateIdx(self, state):
        position        = state[0]
        velocity        = state[1]
        angle           = state[2]
        angularVelocity = state[3]
         
        cartPositionBin = np.linspace(self.lowerBounds[0], self.upperBounds[0], self.binCounts[0])
        cartVelocityBin = np.linspace(self.lowerBounds[1], self.upperBounds[1], self.binCounts[1])
        poleAngleBin    = np.linspace(self.lowerBounds[2], self.upperBounds[2], self.binCounts[2])
        poleAngleVelocityBin = np.linspace(self.lowerBounds[3], self.upperBounds[3], self.binCounts[3])
         
        positionIdx        = np.maximum(np.digitize(state[0], cartPositionBin) - 1, 0)
        velocityIdx        = np.maximum(np.digitize(state[1], cartVelocityBin) - 1, 0)
        angleIdx           = np.maximum(np.digitize(state[2], poleAngleBin) - 1, 0)
        angularVelocityIdx = np.maximum(np.digitize(state[3], poleAngleVelocityBin) - 1, 0)
         
        return tuple([positionIdx, velocityIdx, angleIdx, angularVelocityIdx])   

        
    ###########################################################################
    #    START - function for selecting an action: epsilon-greedy approach
    ###########################################################################
    # this function selects an action on the basis of the current state 
    # INPUTS: 
    # state - state for which to compute the action
    # index - index of the current episode
    def selectAction(self, state, index):
         
        # first 500 episodes we select completely random actions to have enough exploration
        if index < 500:
            return np.random.choice(self.actionsCount)   
             
        # Returns a random real number in the half-open interval [0.0, 1.0)
        # this number is used for the epsilon greedy approach
        randomNumber = np.random.random()
         
        # after 7000 episodes, we slowly start to decrease the epsilon parameter
        if index > 7000:
            self.epsilon = 0.999 * self.epsilon
         
        # if this condition is satisfied, we are exploring, that is, we select random actions
        if randomNumber < self.epsilon:
            # returns a random action selected from: 0,1,...,actionNumber-1
            return np.random.choice(self.actionsCount)            
         
        # otherwise, we are selecting greedy actions
        else:
            # we return the index where Qmatrix[state,:] has the max value
            # that is, since the index denotes an action, we select greedy actions
            return np.random.choice(np.where(self.Qmatrix[self.calcStateIdx(state)] == np.max(self.Qmatrix[self.calcStateIdx(state)]))[0])
            # here we need to return the minimum index since it can happen
            # that there are several identical maximal entries, for example 
            # import numpy as np
            # a=[0,1,1,0]
            # np.where(a==np.max(a))
            # this will return [1,2], but we only need a single index
            # that is why we need to have np.random.choice(np.where(a==np.max(a))[0])
            # note that zero has to be added here since np.where() returns a tuple
     
     
    ###########################################################################
    #    START - function for simulating learning episodes
    ###########################################################################      
    def simulateEpisodes(self):
        # here we loop through the episodes
        for episodeIdx in range(self.episodesCount):
             
            # list that stores rewards per episode - this is necessary for keeping track of convergence 
            episodeRewards = []
             
            # reset the environment at the beginning of every episode
            current_state = self.env.reset()
            current_state = list(current_state) # s
           
            print("Simulating episode {}".format(episodeIdx))             
             
            # here we step from one state to another
            # this will loop until a terminal state is reached
            terminal_state = False
            while not terminal_state:
                # return a discretized index of the state
                 
                current_state_idx = self.calcStateIdx(current_state)
                 
                # select an action on the basis of the current state, denoted by stateS
                current_action = self.selectAction(current_state, episodeIdx) #  a
                                  
                # here we step and return the state, reward, and boolean denoting if the state is a terminal state
                # prime means that it is the next state
                (next_state, reward, terminal_state, _) = self.env.step(current_action)          
                 
                episodeRewards.append(reward) # r
                 
                next_state = list(next_state) # s'
                 
                next_state_idx = self.calcStateIdx(next_state)
                 
                # return the max value, we do not need actionAprime...
                Qmax_next = np.max(self.Qmatrix[next_state_idx])
                                              
                if not terminal_state:
                    # stateS+(actionA,) - we use this notation to append the tuples
                    # for example, for current_state = (0, 0, 0, 1) and current_action = (1, 0)
                    # we have current_state + (current_action,) = (0, 0, 0, 1, 0)
                    q_update = reward + self.gamma * Qmax_next - self.Qmatrix[current_state_idx + (current_action,)]
                    self.Qmatrix[current_state_idx + (current_action,)] = self.Qmatrix[current_state_idx + (current_action,)] + self.alpha * q_update
                else:
                    # in the terminal state, we have Qmatrix[next_state, next_action] = 0 
                    q_update = reward - self.Qmatrix[current_state_idx + (current_action,)]
                    self.Qmatrix[current_state_idx + (current_action,)] = self.Qmatrix[current_state_idx + (current_action,)] + self.alpha * q_update
                 
                # set the current state to the next state                    
                current_state = next_state
         
            print("Sum of rewards {}".format(np.sum(episodeRewards)))        
            self.sumRewardsEpisode.append(np.sum(episodeRewards))
     
     
    ###########################################################################
    #    START - function for simulating the final learned optimal policy
    ###########################################################################
    # OUTPUT: 
    # env1 - created Cart Pole environment
    # obtainedRewards - a list of obtained rewards during time steps of a single episode
     
    # simulate the final learned optimal policy
    def simulateLearnedStrategy(self):
        cartpole_env = gym.make('CartPole-v1',render_mode = 'human')
        state = cartpole_env.reset()
        cartpole_env.render()
        time_steps = 1000
        
        # obtained rewards at every time step
        obtained_rewards = []
         
        for timeIdx in range(time_steps):
            print(timeIdx)
            
            # select greedy actions
            action = np.random.choice(np.where(self.Qmatrix[self.calcStateIdx(state)] == np.max(self.Qmatrix[self.calcSateIdx(state)]))[0])
            state, reward, terminated, truncated, info = cartpole_env.step(action)
            obtained_rewards.append(reward)
            
            time.sleep(0.05)
            if (terminated):
                time.sleep(1)
                break

        return obtained_rewards, cartpole_env
    
    
    ###########################################################################
    #    START - function for simulating random actions many times
    #   this is used to evaluate the optimal policy and to compare it with a random policy
    ###########################################################################
    #  OUTPUT:
    # sumRewardsEpisodes - every entry of this list is a sum of rewards obtained by simulating the corresponding episode
    # env2 - created Cart Pole environment
    def simulateRandomStrategy(self):
        cartpole_env = gym.make('CartPole-v1')
        state = cartpole_env.reset()
        cartpole_env.render()
        # number of simulation episodes
        episodes_count = 100
        # time steps in every episode
        time_steps = 1000
        # sum of rewards in each episode
        episodes_cum_rewards = []         
         
        for episode_idx in range(episodes_count):
            episode_rewards = []
            initial_state = cartpole_env.reset()
            print(episodeIndex)

            for timeIndex in range(time_steps):
                random_action=cartpole_env.action_space.sample()
                observation, reward, terminated, truncated, info = cartpole_env.step(random_action)
                episode_rewards.append(reward)

                if (terminated):
                    break

            episodes_cum_rewards.append(np.sum(episode_rewards))
            
        return episodes_cum_rewards, cartpole_env 

In [None]:
env = gym.make('CartPole-v1')
state = env.reset()
 
# here define the parameters for state discretization
upperBounds = env.observation_space.high
lowerBounds = env.observation_space.low
cartVelocityMin = -3
cartVelocityMax = 3
poleAngleVelocityMin = -10
poleAngleVelocityMax = 10
upperBounds[1] = cartVelocityMax
upperBounds[3] = poleAngleVelocityMax
lowerBounds[1] = cartVelocityMin
lowerBounds[3] = poleAngleVelocityMin
 
numberOfBinsPosition = 30
numberOfBinsVelocity = 30
numberOfBinsAngle = 30
numberOfBinsAngleVelocity = 30
numberOfBins = [numberOfBinsPosition, numberOfBinsVelocity, numberOfBinsAngle, numberOfBinsAngleVelocity]
 
# define the parameters
alpha = 0.1
gamma = 1
epsilon = 0.2
numberEpisodes = 15000
 
# create an object
Q1 = Q_Learning(env, alpha, gamma, epsilon, numberEpisodes, numberOfBins, lowerBounds, upperBounds)
# run the Q-Learning algorithm
Q1.simulateEpisodes()
# simulate the learned strategy
(obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy()
 
plt.figure(figsize=(12, 5))

# plot the figure and adjust the plot parameters
plt.plot(Q1.sumRewardsEpisode, color='blue', linewidth=1)
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.yscale('log')
plt.show()
plt.savefig('convergence.png')
 
# close the environment
env1.close()
# get the sum of rewards
np.sum(obtainedRewardsOptimal)
 
# now simulate a random strategy
(obtainedRewardsRandom, env2) = Q1.simulateRandomStrategy()
plt.hist(obtainedRewardsRandom)
plt.xlabel('Sum of rewards')
plt.ylabel('Percentage')
plt.savefig('histogram.png')
plt.show()
 
# run this several times and compare with a random learning strategy
(obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy()

Simulating episode 0
Sum of rewards 13.0
Simulating episode 1
Sum of rewards 26.0
Simulating episode 2
Sum of rewards 16.0
Simulating episode 3
Sum of rewards 25.0
Simulating episode 4
Sum of rewards 31.0
Simulating episode 5
Sum of rewards 41.0
Simulating episode 6
Sum of rewards 45.0
Simulating episode 7
Sum of rewards 35.0
Simulating episode 8
Sum of rewards 17.0
Simulating episode 9
Sum of rewards 14.0
Simulating episode 10
Sum of rewards 11.0
Simulating episode 11
Sum of rewards 18.0
Simulating episode 12
Sum of rewards 30.0
Simulating episode 13
Sum of rewards 18.0
Simulating episode 14
Sum of rewards 21.0
Simulating episode 15
Sum of rewards 17.0
Simulating episode 16
Sum of rewards 21.0
Simulating episode 17
Sum of rewards 12.0
Simulating episode 18
Sum of rewards 13.0
Simulating episode 19
Sum of rewards 10.0
Simulating episode 20
Sum of rewards 20.0
Simulating episode 21
Sum of rewards 35.0
Simulating episode 22
Sum of rewards 16.0
Simulating episode 23
Sum of rewards 21.0
Si

Sum of rewards 26.0
Simulating episode 213
Sum of rewards 15.0
Simulating episode 214
Sum of rewards 14.0
Simulating episode 215
Sum of rewards 16.0
Simulating episode 216
Sum of rewards 28.0
Simulating episode 217
Sum of rewards 17.0
Simulating episode 218
Sum of rewards 14.0
Simulating episode 219
Sum of rewards 35.0
Simulating episode 220
Sum of rewards 18.0
Simulating episode 221
Sum of rewards 13.0
Simulating episode 222
Sum of rewards 14.0
Simulating episode 223
Sum of rewards 21.0
Simulating episode 224
Sum of rewards 15.0
Simulating episode 225
Sum of rewards 28.0
Simulating episode 226
Sum of rewards 17.0
Simulating episode 227
Sum of rewards 22.0
Simulating episode 228
Sum of rewards 13.0
Simulating episode 229
Sum of rewards 29.0
Simulating episode 230
Sum of rewards 13.0
Simulating episode 231
Sum of rewards 28.0
Simulating episode 232
Sum of rewards 27.0
Simulating episode 233
Sum of rewards 15.0
Simulating episode 234
Sum of rewards 10.0
Simulating episode 235
Sum of rewa

Sum of rewards 22.0
Simulating episode 417
Sum of rewards 17.0
Simulating episode 418
Sum of rewards 17.0
Simulating episode 419
Sum of rewards 26.0
Simulating episode 420
Sum of rewards 16.0
Simulating episode 421
Sum of rewards 14.0
Simulating episode 422
Sum of rewards 24.0
Simulating episode 423
Sum of rewards 18.0
Simulating episode 424
Sum of rewards 19.0
Simulating episode 425
Sum of rewards 17.0
Simulating episode 426
Sum of rewards 11.0
Simulating episode 427
Sum of rewards 25.0
Simulating episode 428
Sum of rewards 21.0
Simulating episode 429
Sum of rewards 13.0
Simulating episode 430
Sum of rewards 22.0
Simulating episode 431
Sum of rewards 14.0
Simulating episode 432
Sum of rewards 12.0
Simulating episode 433
Sum of rewards 22.0
Simulating episode 434
Sum of rewards 20.0
Simulating episode 435
Sum of rewards 28.0
Simulating episode 436
Sum of rewards 14.0
Simulating episode 437
Sum of rewards 14.0
Simulating episode 438
Sum of rewards 36.0
Simulating episode 439
Sum of rewa

Sum of rewards 119.0
Simulating episode 608
Sum of rewards 87.0
Simulating episode 609
Sum of rewards 127.0
Simulating episode 610
Sum of rewards 95.0
Simulating episode 611
Sum of rewards 112.0
Simulating episode 612
Sum of rewards 63.0
Simulating episode 613
Sum of rewards 218.0
Simulating episode 614
Sum of rewards 23.0
Simulating episode 615
Sum of rewards 107.0
Simulating episode 616
Sum of rewards 82.0
Simulating episode 617
Sum of rewards 112.0
Simulating episode 618
Sum of rewards 147.0
Simulating episode 619
Sum of rewards 71.0
Simulating episode 620
Sum of rewards 201.0
Simulating episode 621
Sum of rewards 53.0
Simulating episode 622
Sum of rewards 85.0
Simulating episode 623
Sum of rewards 88.0
Simulating episode 624
Sum of rewards 133.0
Simulating episode 625
Sum of rewards 118.0
Simulating episode 626
Sum of rewards 113.0
Simulating episode 627
Sum of rewards 151.0
Simulating episode 628
Sum of rewards 27.0
Simulating episode 629
Sum of rewards 160.0
Simulating episode 63

Sum of rewards 261.0
Simulating episode 799
Sum of rewards 111.0
Simulating episode 800
Sum of rewards 239.0
Simulating episode 801
Sum of rewards 82.0
Simulating episode 802
Sum of rewards 117.0
Simulating episode 803
Sum of rewards 72.0
Simulating episode 804
Sum of rewards 54.0
Simulating episode 805
Sum of rewards 191.0
Simulating episode 806
Sum of rewards 19.0
Simulating episode 807
Sum of rewards 104.0
Simulating episode 808
Sum of rewards 64.0
Simulating episode 809
Sum of rewards 22.0
Simulating episode 810
Sum of rewards 175.0
Simulating episode 811
Sum of rewards 316.0
Simulating episode 812
Sum of rewards 86.0
Simulating episode 813
Sum of rewards 139.0
Simulating episode 814
Sum of rewards 164.0
Simulating episode 815
Sum of rewards 120.0
Simulating episode 816
Sum of rewards 115.0
Simulating episode 817
Sum of rewards 263.0
Simulating episode 818
Sum of rewards 110.0
Simulating episode 819
Sum of rewards 83.0
Simulating episode 820
Sum of rewards 129.0
Simulating episode 

Sum of rewards 145.0
Simulating episode 988
Sum of rewards 155.0
Simulating episode 989
Sum of rewards 131.0
Simulating episode 990
Sum of rewards 74.0
Simulating episode 991
Sum of rewards 87.0
Simulating episode 992
Sum of rewards 106.0
Simulating episode 993
Sum of rewards 290.0
Simulating episode 994
Sum of rewards 128.0
Simulating episode 995
Sum of rewards 158.0
Simulating episode 996
Sum of rewards 161.0
Simulating episode 997
Sum of rewards 199.0
Simulating episode 998
Sum of rewards 178.0
Simulating episode 999
Sum of rewards 66.0
Simulating episode 1000
Sum of rewards 182.0
Simulating episode 1001
Sum of rewards 166.0
Simulating episode 1002
Sum of rewards 136.0
Simulating episode 1003
Sum of rewards 139.0
Simulating episode 1004
Sum of rewards 295.0
Simulating episode 1005
Sum of rewards 173.0
Simulating episode 1006
Sum of rewards 51.0
Simulating episode 1007
Sum of rewards 32.0
Simulating episode 1008
Sum of rewards 129.0
Simulating episode 1009
Sum of rewards 52.0
Simulat

Sum of rewards 76.0
Simulating episode 1174
Sum of rewards 60.0
Simulating episode 1175
Sum of rewards 58.0
Simulating episode 1176
Sum of rewards 79.0
Simulating episode 1177
Sum of rewards 135.0
Simulating episode 1178
Sum of rewards 174.0
Simulating episode 1179
Sum of rewards 77.0
Simulating episode 1180
Sum of rewards 122.0
Simulating episode 1181
Sum of rewards 55.0
Simulating episode 1182
Sum of rewards 152.0
Simulating episode 1183
Sum of rewards 114.0
Simulating episode 1184
Sum of rewards 83.0
Simulating episode 1185
Sum of rewards 100.0
Simulating episode 1186
Sum of rewards 79.0
Simulating episode 1187
Sum of rewards 124.0
Simulating episode 1188
Sum of rewards 98.0
Simulating episode 1189
Sum of rewards 139.0
Simulating episode 1190
Sum of rewards 64.0
Simulating episode 1191
Sum of rewards 108.0
Simulating episode 1192
Sum of rewards 42.0
Simulating episode 1193
Sum of rewards 97.0
Simulating episode 1194
Sum of rewards 100.0
Simulating episode 1195
Sum of rewards 71.0
Si

Sum of rewards 142.0
Simulating episode 1359
Sum of rewards 143.0
Simulating episode 1360
Sum of rewards 98.0
Simulating episode 1361
Sum of rewards 239.0
Simulating episode 1362
Sum of rewards 82.0
Simulating episode 1363
Sum of rewards 58.0
Simulating episode 1364
Sum of rewards 181.0
Simulating episode 1365
Sum of rewards 96.0
Simulating episode 1366
Sum of rewards 115.0
Simulating episode 1367
Sum of rewards 117.0
Simulating episode 1368
Sum of rewards 86.0
Simulating episode 1369
Sum of rewards 120.0
Simulating episode 1370
Sum of rewards 175.0
Simulating episode 1371
Sum of rewards 79.0
Simulating episode 1372
Sum of rewards 86.0
Simulating episode 1373
Sum of rewards 53.0
Simulating episode 1374
Sum of rewards 74.0
Simulating episode 1375
Sum of rewards 107.0
Simulating episode 1376
Sum of rewards 58.0
Simulating episode 1377
Sum of rewards 57.0
Simulating episode 1378
Sum of rewards 80.0
Simulating episode 1379
Sum of rewards 140.0
Simulating episode 1380
Sum of rewards 67.0
Si

Sum of rewards 90.0
Simulating episode 1544
Sum of rewards 164.0
Simulating episode 1545
Sum of rewards 137.0
Simulating episode 1546
Sum of rewards 66.0
Simulating episode 1547
Sum of rewards 50.0
Simulating episode 1548
Sum of rewards 62.0
Simulating episode 1549
Sum of rewards 41.0
Simulating episode 1550
Sum of rewards 136.0
Simulating episode 1551
Sum of rewards 133.0
Simulating episode 1552
Sum of rewards 23.0
Simulating episode 1553
Sum of rewards 111.0
Simulating episode 1554
Sum of rewards 42.0
Simulating episode 1555
Sum of rewards 211.0
Simulating episode 1556
Sum of rewards 153.0
Simulating episode 1557
Sum of rewards 111.0
Simulating episode 1558
Sum of rewards 84.0
Simulating episode 1559
Sum of rewards 149.0
Simulating episode 1560
Sum of rewards 87.0
Simulating episode 1561
Sum of rewards 129.0
Simulating episode 1562
Sum of rewards 32.0
Simulating episode 1563
Sum of rewards 120.0
Simulating episode 1564
Sum of rewards 80.0
Simulating episode 1565
Sum of rewards 144.0


Sum of rewards 89.0
Simulating episode 1729
Sum of rewards 108.0
Simulating episode 1730
Sum of rewards 138.0
Simulating episode 1731
Sum of rewards 149.0
Simulating episode 1732
Sum of rewards 136.0
Simulating episode 1733
Sum of rewards 90.0
Simulating episode 1734
Sum of rewards 93.0
Simulating episode 1735
Sum of rewards 131.0
Simulating episode 1736
Sum of rewards 54.0
Simulating episode 1737
Sum of rewards 62.0
Simulating episode 1738
Sum of rewards 83.0
Simulating episode 1739
Sum of rewards 61.0
Simulating episode 1740
Sum of rewards 73.0
Simulating episode 1741
Sum of rewards 87.0
Simulating episode 1742
Sum of rewards 110.0
Simulating episode 1743
Sum of rewards 83.0
Simulating episode 1744
Sum of rewards 95.0
Simulating episode 1745
Sum of rewards 143.0
Simulating episode 1746
Sum of rewards 90.0
Simulating episode 1747
Sum of rewards 67.0
Simulating episode 1748
Sum of rewards 55.0
Simulating episode 1749
Sum of rewards 157.0
Simulating episode 1750
Sum of rewards 213.0
Sim

Sum of rewards 174.0
Simulating episode 1914
Sum of rewards 74.0
Simulating episode 1915
Sum of rewards 62.0
Simulating episode 1916
Sum of rewards 120.0
Simulating episode 1917
Sum of rewards 58.0
Simulating episode 1918
Sum of rewards 110.0
Simulating episode 1919
Sum of rewards 157.0
Simulating episode 1920
Sum of rewards 74.0
Simulating episode 1921
Sum of rewards 55.0
Simulating episode 1922
Sum of rewards 154.0
Simulating episode 1923
Sum of rewards 76.0
Simulating episode 1924
Sum of rewards 248.0
Simulating episode 1925
Sum of rewards 121.0
Simulating episode 1926
Sum of rewards 125.0
Simulating episode 1927
Sum of rewards 128.0
Simulating episode 1928
Sum of rewards 186.0
Simulating episode 1929
Sum of rewards 200.0
Simulating episode 1930
Sum of rewards 64.0
Simulating episode 1931
Sum of rewards 97.0
Simulating episode 1932
Sum of rewards 16.0
Simulating episode 1933
Sum of rewards 69.0
Simulating episode 1934
Sum of rewards 102.0
Simulating episode 1935
Sum of rewards 170.0

Sum of rewards 191.0
Simulating episode 2100
Sum of rewards 105.0
Simulating episode 2101
Sum of rewards 153.0
Simulating episode 2102
Sum of rewards 141.0
Simulating episode 2103
Sum of rewards 231.0
Simulating episode 2104
Sum of rewards 102.0
Simulating episode 2105
Sum of rewards 92.0
Simulating episode 2106
Sum of rewards 120.0
Simulating episode 2107
Sum of rewards 95.0
Simulating episode 2108
Sum of rewards 87.0
Simulating episode 2109
Sum of rewards 97.0
Simulating episode 2110
Sum of rewards 70.0
Simulating episode 2111
Sum of rewards 47.0
Simulating episode 2112
Sum of rewards 106.0
Simulating episode 2113
Sum of rewards 66.0
Simulating episode 2114
Sum of rewards 70.0
Simulating episode 2115
Sum of rewards 83.0
Simulating episode 2116
Sum of rewards 99.0
Simulating episode 2117
Sum of rewards 140.0
Simulating episode 2118
Sum of rewards 22.0
Simulating episode 2119
Sum of rewards 128.0
Simulating episode 2120
Sum of rewards 111.0
Simulating episode 2121
Sum of rewards 142.0


Sum of rewards 65.0
Simulating episode 2284
Sum of rewards 185.0
Simulating episode 2285
Sum of rewards 194.0
Simulating episode 2286
Sum of rewards 118.0
Simulating episode 2287
Sum of rewards 55.0
Simulating episode 2288
Sum of rewards 131.0
Simulating episode 2289
Sum of rewards 102.0
Simulating episode 2290
Sum of rewards 94.0
Simulating episode 2291
Sum of rewards 75.0
Simulating episode 2292
Sum of rewards 139.0
Simulating episode 2293
Sum of rewards 173.0
Simulating episode 2294
Sum of rewards 102.0
Simulating episode 2295
Sum of rewards 68.0
Simulating episode 2296
Sum of rewards 106.0
Simulating episode 2297
Sum of rewards 241.0
Simulating episode 2298
Sum of rewards 127.0
Simulating episode 2299
Sum of rewards 91.0
Simulating episode 2300
Sum of rewards 42.0
Simulating episode 2301
Sum of rewards 81.0
Simulating episode 2302
Sum of rewards 48.0
Simulating episode 2303
Sum of rewards 19.0
Simulating episode 2304
Sum of rewards 206.0
Simulating episode 2305
Sum of rewards 140.0

Simulating episode 2467
Sum of rewards 70.0
Simulating episode 2468
Sum of rewards 162.0
Simulating episode 2469
Sum of rewards 197.0
Simulating episode 2470
Sum of rewards 93.0
Simulating episode 2471
Sum of rewards 120.0
Simulating episode 2472
Sum of rewards 114.0
Simulating episode 2473
Sum of rewards 132.0
Simulating episode 2474
Sum of rewards 141.0
Simulating episode 2475
Sum of rewards 67.0
Simulating episode 2476
Sum of rewards 172.0
Simulating episode 2477
Sum of rewards 125.0
Simulating episode 2478
Sum of rewards 157.0
Simulating episode 2479
Sum of rewards 100.0
Simulating episode 2480
Sum of rewards 240.0
Simulating episode 2481
Sum of rewards 374.0
Simulating episode 2482
Sum of rewards 132.0
Simulating episode 2483
Sum of rewards 94.0
Simulating episode 2484
Sum of rewards 44.0
Simulating episode 2485
Sum of rewards 74.0
Simulating episode 2486
Sum of rewards 159.0
Simulating episode 2487
Sum of rewards 86.0
Simulating episode 2488
Sum of rewards 148.0
Simulating episod

Sum of rewards 119.0
Simulating episode 2654
Sum of rewards 82.0
Simulating episode 2655
Sum of rewards 93.0
Simulating episode 2656
Sum of rewards 91.0
Simulating episode 2657
Sum of rewards 106.0
Simulating episode 2658
Sum of rewards 202.0
Simulating episode 2659
Sum of rewards 120.0
Simulating episode 2660
Sum of rewards 98.0
Simulating episode 2661
Sum of rewards 138.0
Simulating episode 2662
Sum of rewards 143.0
Simulating episode 2663
Sum of rewards 121.0
Simulating episode 2664
Sum of rewards 85.0
Simulating episode 2665
Sum of rewards 169.0
Simulating episode 2666
Sum of rewards 171.0
Simulating episode 2667
Sum of rewards 147.0
Simulating episode 2668
Sum of rewards 126.0
Simulating episode 2669
Sum of rewards 117.0
Simulating episode 2670
Sum of rewards 117.0
Simulating episode 2671
Sum of rewards 111.0
Simulating episode 2672
Sum of rewards 82.0
Simulating episode 2673
Sum of rewards 99.0
Simulating episode 2674
Sum of rewards 174.0
Simulating episode 2675
Sum of rewards 14

Sum of rewards 186.0
Simulating episode 2838
Sum of rewards 112.0
Simulating episode 2839
Sum of rewards 129.0
Simulating episode 2840
Sum of rewards 229.0
Simulating episode 2841
Sum of rewards 172.0
Simulating episode 2842
Sum of rewards 174.0
Simulating episode 2843
Sum of rewards 94.0
Simulating episode 2844
Sum of rewards 195.0
Simulating episode 2845
Sum of rewards 119.0
Simulating episode 2846
Sum of rewards 86.0
Simulating episode 2847
Sum of rewards 150.0
Simulating episode 2848
Sum of rewards 96.0
Simulating episode 2849
Sum of rewards 183.0
Simulating episode 2850
Sum of rewards 106.0
Simulating episode 2851
Sum of rewards 118.0
Simulating episode 2852
Sum of rewards 163.0
Simulating episode 2853
Sum of rewards 256.0
Simulating episode 2854
Sum of rewards 149.0
Simulating episode 2855
Sum of rewards 272.0
Simulating episode 2856
Sum of rewards 145.0
Simulating episode 2857
Sum of rewards 181.0
Simulating episode 2858
Sum of rewards 258.0
Simulating episode 2859
Sum of reward

Sum of rewards 150.0
Simulating episode 3022
Sum of rewards 55.0
Simulating episode 3023
Sum of rewards 79.0
Simulating episode 3024
Sum of rewards 48.0
Simulating episode 3025
Sum of rewards 79.0
Simulating episode 3026
Sum of rewards 78.0
Simulating episode 3027
Sum of rewards 42.0
Simulating episode 3028
Sum of rewards 125.0
Simulating episode 3029
Sum of rewards 79.0
Simulating episode 3030
Sum of rewards 57.0
Simulating episode 3031
Sum of rewards 58.0
Simulating episode 3032
Sum of rewards 121.0
Simulating episode 3033
Sum of rewards 86.0
Simulating episode 3034
Sum of rewards 111.0
Simulating episode 3035
Sum of rewards 182.0
Simulating episode 3036
Sum of rewards 125.0
Simulating episode 3037
Sum of rewards 70.0
Simulating episode 3038
Sum of rewards 108.0
Simulating episode 3039
Sum of rewards 120.0
Simulating episode 3040
Sum of rewards 136.0
Simulating episode 3041
Sum of rewards 103.0
Simulating episode 3042
Sum of rewards 117.0
Simulating episode 3043
Sum of rewards 124.0


Sum of rewards 217.0
Simulating episode 3206
Sum of rewards 70.0
Simulating episode 3207
Sum of rewards 231.0
Simulating episode 3208
Sum of rewards 181.0
Simulating episode 3209
Sum of rewards 140.0
Simulating episode 3210
Sum of rewards 11.0
Simulating episode 3211
Sum of rewards 81.0
Simulating episode 3212
Sum of rewards 86.0
Simulating episode 3213
Sum of rewards 123.0
Simulating episode 3214
Sum of rewards 113.0
Simulating episode 3215
Sum of rewards 162.0
Simulating episode 3216
Sum of rewards 135.0
Simulating episode 3217
Sum of rewards 132.0
Simulating episode 3218
Sum of rewards 125.0
Simulating episode 3219
Sum of rewards 90.0
Simulating episode 3220
Sum of rewards 186.0
Simulating episode 3221
Sum of rewards 243.0
Simulating episode 3222
Sum of rewards 278.0
Simulating episode 3223
Sum of rewards 304.0
Simulating episode 3224
Sum of rewards 207.0
Simulating episode 3225
Sum of rewards 72.0
Simulating episode 3226
Sum of rewards 435.0
Simulating episode 3227
Sum of rewards 1

Sum of rewards 156.0
Simulating episode 3393
Sum of rewards 268.0
Simulating episode 3394
Sum of rewards 165.0
Simulating episode 3395
Sum of rewards 133.0
Simulating episode 3396
Sum of rewards 269.0
Simulating episode 3397
Sum of rewards 162.0
Simulating episode 3398
Sum of rewards 52.0
Simulating episode 3399
Sum of rewards 64.0
Simulating episode 3400
Sum of rewards 99.0
Simulating episode 3401
Sum of rewards 132.0
Simulating episode 3402
Sum of rewards 193.0
Simulating episode 3403
Sum of rewards 58.0
Simulating episode 3404
Sum of rewards 307.0
Simulating episode 3405
Sum of rewards 242.0
Simulating episode 3406
Sum of rewards 76.0
Simulating episode 3407
Sum of rewards 146.0
Simulating episode 3408
Sum of rewards 77.0
Simulating episode 3409
Sum of rewards 104.0
Simulating episode 3410
Sum of rewards 162.0
Simulating episode 3411
Sum of rewards 74.0
Simulating episode 3412
Sum of rewards 139.0
Simulating episode 3413
Sum of rewards 112.0
Simulating episode 3414
Sum of rewards 12

Sum of rewards 151.0
Simulating episode 3579
Sum of rewards 129.0
Simulating episode 3580
Sum of rewards 173.0
Simulating episode 3581
Sum of rewards 191.0
Simulating episode 3582
Sum of rewards 28.0
Simulating episode 3583
Sum of rewards 272.0
Simulating episode 3584
Sum of rewards 149.0
Simulating episode 3585
Sum of rewards 128.0
Simulating episode 3586
Sum of rewards 93.0
Simulating episode 3587
Sum of rewards 210.0
Simulating episode 3588
Sum of rewards 238.0
Simulating episode 3589
Sum of rewards 483.0
Simulating episode 3590
Sum of rewards 178.0
Simulating episode 3591
Sum of rewards 165.0
Simulating episode 3592
Sum of rewards 20.0
Simulating episode 3593
Sum of rewards 137.0
Simulating episode 3594
Sum of rewards 113.0
Simulating episode 3595
Sum of rewards 125.0
Simulating episode 3596
Sum of rewards 384.0
Simulating episode 3597
Sum of rewards 164.0
Simulating episode 3598
Sum of rewards 96.0
Simulating episode 3599
Sum of rewards 97.0
Simulating episode 3600
Sum of rewards 

Sum of rewards 56.0
Simulating episode 3766
Sum of rewards 175.0
Simulating episode 3767
Sum of rewards 103.0
Simulating episode 3768
Sum of rewards 185.0
Simulating episode 3769
Sum of rewards 109.0
Simulating episode 3770
Sum of rewards 174.0
Simulating episode 3771
Sum of rewards 75.0
Simulating episode 3772
Sum of rewards 136.0
Simulating episode 3773
Sum of rewards 38.0
Simulating episode 3774
Sum of rewards 86.0
Simulating episode 3775
Sum of rewards 139.0
Simulating episode 3776
Sum of rewards 136.0
Simulating episode 3777
Sum of rewards 45.0
Simulating episode 3778
Sum of rewards 139.0
Simulating episode 3779
Sum of rewards 80.0
Simulating episode 3780
Sum of rewards 156.0
Simulating episode 3781
Sum of rewards 120.0
Simulating episode 3782
Sum of rewards 104.0
Simulating episode 3783
Sum of rewards 123.0
Simulating episode 3784
Sum of rewards 203.0
Simulating episode 3785
Sum of rewards 118.0
Simulating episode 3786
Sum of rewards 136.0
Simulating episode 3787
Sum of rewards 1

Sum of rewards 194.0
Simulating episode 3951
Sum of rewards 185.0
Simulating episode 3952
Sum of rewards 171.0
Simulating episode 3953
Sum of rewards 102.0
Simulating episode 3954
Sum of rewards 218.0
Simulating episode 3955
Sum of rewards 187.0
Simulating episode 3956
Sum of rewards 292.0
Simulating episode 3957
Sum of rewards 281.0
Simulating episode 3958
Sum of rewards 244.0
Simulating episode 3959
Sum of rewards 38.0
Simulating episode 3960
Sum of rewards 110.0
Simulating episode 3961
Sum of rewards 156.0
Simulating episode 3962
Sum of rewards 134.0
Simulating episode 3963
Sum of rewards 143.0
Simulating episode 3964
Sum of rewards 111.0
Simulating episode 3965
Sum of rewards 156.0
Simulating episode 3966
Sum of rewards 105.0
Simulating episode 3967
Sum of rewards 156.0
Simulating episode 3968
Sum of rewards 205.0
Simulating episode 3969
Sum of rewards 234.0
Simulating episode 3970
Sum of rewards 196.0
Simulating episode 3971
Sum of rewards 273.0
Simulating episode 3972
Sum of rewa

Sum of rewards 285.0
Simulating episode 4136
Sum of rewards 169.0
Simulating episode 4137
Sum of rewards 333.0
Simulating episode 4138
Sum of rewards 213.0
Simulating episode 4139
Sum of rewards 177.0
Simulating episode 4140
Sum of rewards 122.0
Simulating episode 4141
Sum of rewards 99.0
Simulating episode 4142
Sum of rewards 142.0
Simulating episode 4143
Sum of rewards 78.0
Simulating episode 4144
Sum of rewards 167.0
Simulating episode 4145
Sum of rewards 109.0
Simulating episode 4146
Sum of rewards 86.0
Simulating episode 4147
Sum of rewards 170.0
Simulating episode 4148
Sum of rewards 121.0
Simulating episode 4149
Sum of rewards 156.0
Simulating episode 4150
Sum of rewards 105.0
Simulating episode 4151
Sum of rewards 211.0
Simulating episode 4152
Sum of rewards 75.0
Simulating episode 4153
Sum of rewards 94.0
Simulating episode 4154
Sum of rewards 159.0
Simulating episode 4155
Sum of rewards 62.0
Simulating episode 4156
Sum of rewards 61.0
Simulating episode 4157
Sum of rewards 12

Sum of rewards 76.0
Simulating episode 4321
Sum of rewards 111.0
Simulating episode 4322
Sum of rewards 81.0
Simulating episode 4323
Sum of rewards 72.0
Simulating episode 4324
Sum of rewards 120.0
Simulating episode 4325
Sum of rewards 174.0
Simulating episode 4326
Sum of rewards 167.0
Simulating episode 4327
Sum of rewards 97.0
Simulating episode 4328
Sum of rewards 124.0
Simulating episode 4329
Sum of rewards 187.0
Simulating episode 4330
Sum of rewards 131.0
Simulating episode 4331
Sum of rewards 233.0
Simulating episode 4332
Sum of rewards 212.0
Simulating episode 4333
Sum of rewards 178.0
Simulating episode 4334
Sum of rewards 124.0
Simulating episode 4335
Sum of rewards 89.0
Simulating episode 4336
Sum of rewards 22.0
Simulating episode 4337
Sum of rewards 106.0
Simulating episode 4338
Sum of rewards 139.0
Simulating episode 4339
Sum of rewards 32.0
Simulating episode 4340
Sum of rewards 106.0
Simulating episode 4341
Sum of rewards 176.0
Simulating episode 4342
Sum of rewards 19

Sum of rewards 176.0
Simulating episode 4506
Sum of rewards 146.0
Simulating episode 4507
Sum of rewards 285.0
Simulating episode 4508
Sum of rewards 75.0
Simulating episode 4509
Sum of rewards 137.0
Simulating episode 4510
Sum of rewards 273.0
Simulating episode 4511
Sum of rewards 176.0
Simulating episode 4512
Sum of rewards 153.0
Simulating episode 4513
Sum of rewards 174.0
Simulating episode 4514
Sum of rewards 132.0
Simulating episode 4515
Sum of rewards 186.0
Simulating episode 4516
Sum of rewards 41.0
Simulating episode 4517
Sum of rewards 97.0
Simulating episode 4518
Sum of rewards 78.0
Simulating episode 4519
Sum of rewards 249.0
Simulating episode 4520
Sum of rewards 102.0
Simulating episode 4521
Sum of rewards 113.0
Simulating episode 4522
Sum of rewards 141.0
Simulating episode 4523
Sum of rewards 155.0
Simulating episode 4524
Sum of rewards 115.0
Simulating episode 4525
Sum of rewards 86.0
Simulating episode 4526
Sum of rewards 110.0
Simulating episode 4527
Sum of rewards 

Sum of rewards 116.0
Simulating episode 4690
Sum of rewards 262.0
Simulating episode 4691
Sum of rewards 198.0
Simulating episode 4692
Sum of rewards 67.0
Simulating episode 4693
Sum of rewards 148.0
Simulating episode 4694
Sum of rewards 86.0
Simulating episode 4695
Sum of rewards 85.0
Simulating episode 4696
Sum of rewards 168.0
Simulating episode 4697
Sum of rewards 145.0
Simulating episode 4698
Sum of rewards 130.0
Simulating episode 4699
Sum of rewards 84.0
Simulating episode 4700
Sum of rewards 73.0
Simulating episode 4701
Sum of rewards 30.0
Simulating episode 4702
Sum of rewards 204.0
Simulating episode 4703
Sum of rewards 154.0
Simulating episode 4704
Sum of rewards 146.0
Simulating episode 4705
Sum of rewards 238.0
Simulating episode 4706
Sum of rewards 68.0
Simulating episode 4707
Sum of rewards 161.0
Simulating episode 4708
Sum of rewards 324.0
Simulating episode 4709
Sum of rewards 275.0
Simulating episode 4710
Sum of rewards 66.0
Simulating episode 4711
Sum of rewards 89.

Sum of rewards 182.0
Simulating episode 4874
Sum of rewards 131.0
Simulating episode 4875
Sum of rewards 35.0
Simulating episode 4876
Sum of rewards 232.0
Simulating episode 4877
Sum of rewards 128.0
Simulating episode 4878
Sum of rewards 142.0
Simulating episode 4879
Sum of rewards 251.0
Simulating episode 4880
Sum of rewards 158.0
Simulating episode 4881
Sum of rewards 210.0
Simulating episode 4882
Sum of rewards 192.0
Simulating episode 4883
Sum of rewards 242.0
Simulating episode 4884
Sum of rewards 87.0
Simulating episode 4885
Sum of rewards 92.0
Simulating episode 4886
Sum of rewards 176.0
Simulating episode 4887
Sum of rewards 206.0
Simulating episode 4888
Sum of rewards 189.0
Simulating episode 4889
Sum of rewards 246.0
Simulating episode 4890
Sum of rewards 79.0
Simulating episode 4891
Sum of rewards 150.0
Simulating episode 4892
Sum of rewards 153.0
Simulating episode 4893
Sum of rewards 162.0
Simulating episode 4894
Sum of rewards 154.0
Simulating episode 4895
Sum of rewards

Sum of rewards 191.0
Simulating episode 5058
Sum of rewards 142.0
Simulating episode 5059
Sum of rewards 242.0
Simulating episode 5060
Sum of rewards 167.0
Simulating episode 5061
Sum of rewards 164.0
Simulating episode 5062
Sum of rewards 200.0
Simulating episode 5063
Sum of rewards 83.0
Simulating episode 5064
Sum of rewards 102.0
Simulating episode 5065
Sum of rewards 129.0
Simulating episode 5066
Sum of rewards 141.0
Simulating episode 5067
Sum of rewards 120.0
Simulating episode 5068
Sum of rewards 222.0
Simulating episode 5069
Sum of rewards 92.0
Simulating episode 5070
Sum of rewards 166.0
Simulating episode 5071
Sum of rewards 96.0
Simulating episode 5072
Sum of rewards 12.0
Simulating episode 5073
Sum of rewards 164.0
Simulating episode 5074
Sum of rewards 82.0
Simulating episode 5075
Sum of rewards 117.0
Simulating episode 5076
Sum of rewards 152.0
Simulating episode 5077
Sum of rewards 180.0
Simulating episode 5078
Sum of rewards 94.0
Simulating episode 5079
Sum of rewards 1

Sum of rewards 169.0
Simulating episode 5241
Sum of rewards 122.0
Simulating episode 5242
Sum of rewards 302.0
Simulating episode 5243
Sum of rewards 80.0
Simulating episode 5244
Sum of rewards 127.0
Simulating episode 5245
Sum of rewards 73.0
Simulating episode 5246
Sum of rewards 178.0
Simulating episode 5247
Sum of rewards 236.0
Simulating episode 5248
Sum of rewards 110.0
Simulating episode 5249
Sum of rewards 201.0
Simulating episode 5250
Sum of rewards 151.0
Simulating episode 5251
Sum of rewards 196.0
Simulating episode 5252
Sum of rewards 145.0
Simulating episode 5253
Sum of rewards 129.0
Simulating episode 5254
Sum of rewards 255.0
Simulating episode 5255
Sum of rewards 113.0
Simulating episode 5256
Sum of rewards 246.0
Simulating episode 5257
Sum of rewards 223.0
Simulating episode 5258
Sum of rewards 269.0
Simulating episode 5259
Sum of rewards 169.0
Simulating episode 5260
Sum of rewards 359.0
Simulating episode 5261
Sum of rewards 148.0
Simulating episode 5262
Sum of rewar

Sum of rewards 406.0
Simulating episode 5424
Sum of rewards 231.0
Simulating episode 5425
Sum of rewards 207.0
Simulating episode 5426
Sum of rewards 214.0
Simulating episode 5427
Sum of rewards 83.0
Simulating episode 5428
Sum of rewards 170.0
Simulating episode 5429
Sum of rewards 87.0
Simulating episode 5430
Sum of rewards 88.0
Simulating episode 5431
Sum of rewards 127.0
Simulating episode 5432
Sum of rewards 123.0
Simulating episode 5433
Sum of rewards 79.0
Simulating episode 5434
Sum of rewards 91.0
Simulating episode 5435
Sum of rewards 173.0
Simulating episode 5436
Sum of rewards 133.0
Simulating episode 5437
Sum of rewards 268.0
Simulating episode 5438
Sum of rewards 36.0
Simulating episode 5439
Sum of rewards 217.0
Simulating episode 5440
Sum of rewards 53.0
Simulating episode 5441
Sum of rewards 111.0
Simulating episode 5442
Sum of rewards 215.0
Simulating episode 5443
Sum of rewards 180.0
Simulating episode 5444
Sum of rewards 112.0
Simulating episode 5445
Sum of rewards 24

Sum of rewards 151.0
Simulating episode 5608
Sum of rewards 34.0
Simulating episode 5609
Sum of rewards 47.0
Simulating episode 5610
Sum of rewards 136.0
Simulating episode 5611
Sum of rewards 148.0
Simulating episode 5612
Sum of rewards 195.0
Simulating episode 5613
Sum of rewards 142.0
Simulating episode 5614
Sum of rewards 148.0
Simulating episode 5615
Sum of rewards 69.0
Simulating episode 5616
Sum of rewards 28.0
Simulating episode 5617
Sum of rewards 152.0
Simulating episode 5618
Sum of rewards 271.0
Simulating episode 5619
Sum of rewards 197.0
Simulating episode 5620
Sum of rewards 69.0
Simulating episode 5621
Sum of rewards 79.0
Simulating episode 5622
Sum of rewards 166.0
Simulating episode 5623
Sum of rewards 171.0
Simulating episode 5624
Sum of rewards 126.0
Simulating episode 5625
Sum of rewards 179.0
Simulating episode 5626
Sum of rewards 69.0
Simulating episode 5627
Sum of rewards 50.0
Simulating episode 5628
Sum of rewards 38.0
Simulating episode 5629
Sum of rewards 298.

Sum of rewards 204.0
Simulating episode 5793
Sum of rewards 104.0
Simulating episode 5794
Sum of rewards 155.0
Simulating episode 5795
Sum of rewards 233.0
Simulating episode 5796
Sum of rewards 74.0
Simulating episode 5797
Sum of rewards 137.0
Simulating episode 5798
Sum of rewards 149.0
Simulating episode 5799
Sum of rewards 99.0
Simulating episode 5800
Sum of rewards 271.0
Simulating episode 5801
Sum of rewards 115.0
Simulating episode 5802
Sum of rewards 252.0
Simulating episode 5803
Sum of rewards 201.0
Simulating episode 5804
Sum of rewards 150.0
Simulating episode 5805
Sum of rewards 250.0
Simulating episode 5806
Sum of rewards 163.0
Simulating episode 5807
Sum of rewards 188.0
Simulating episode 5808
Sum of rewards 156.0
Simulating episode 5809
Sum of rewards 64.0
Simulating episode 5810
Sum of rewards 292.0
Simulating episode 5811
Sum of rewards 100.0
Simulating episode 5812
Sum of rewards 137.0
Simulating episode 5813
Sum of rewards 196.0
Simulating episode 5814
Sum of reward

Sum of rewards 299.0
Simulating episode 5976
Sum of rewards 353.0
Simulating episode 5977
Sum of rewards 346.0
Simulating episode 5978
Sum of rewards 193.0
Simulating episode 5979
Sum of rewards 143.0
Simulating episode 5980
Sum of rewards 135.0
Simulating episode 5981
Sum of rewards 106.0
Simulating episode 5982
Sum of rewards 259.0
Simulating episode 5983
Sum of rewards 142.0
Simulating episode 5984
Sum of rewards 66.0
Simulating episode 5985
Sum of rewards 110.0
Simulating episode 5986
Sum of rewards 170.0
Simulating episode 5987
Sum of rewards 118.0
Simulating episode 5988
Sum of rewards 138.0
Simulating episode 5989
Sum of rewards 189.0
Simulating episode 5990
Sum of rewards 251.0
Simulating episode 5991
Sum of rewards 187.0
Simulating episode 5992
Sum of rewards 210.0
Simulating episode 5993
Sum of rewards 122.0
Simulating episode 5994
Sum of rewards 128.0
Simulating episode 5995
Sum of rewards 205.0
Simulating episode 5996
Sum of rewards 87.0
Simulating episode 5997
Sum of rewar

Sum of rewards 238.0
Simulating episode 6160
Sum of rewards 202.0
Simulating episode 6161
Sum of rewards 262.0
Simulating episode 6162
Sum of rewards 115.0
Simulating episode 6163
Sum of rewards 147.0
Simulating episode 6164
Sum of rewards 168.0
Simulating episode 6165
Sum of rewards 358.0
Simulating episode 6166
Sum of rewards 191.0
Simulating episode 6167
Sum of rewards 176.0
Simulating episode 6168
Sum of rewards 89.0
Simulating episode 6169
Sum of rewards 112.0
Simulating episode 6170
Sum of rewards 271.0
Simulating episode 6171
Sum of rewards 183.0
Simulating episode 6172
Sum of rewards 254.0
Simulating episode 6173
Sum of rewards 173.0
Simulating episode 6174
Sum of rewards 318.0
Simulating episode 6175
Sum of rewards 138.0
Simulating episode 6176
Sum of rewards 94.0
Simulating episode 6177
Sum of rewards 182.0
Simulating episode 6178
Sum of rewards 183.0
Simulating episode 6179
Sum of rewards 95.0
Simulating episode 6180
Sum of rewards 142.0
Simulating episode 6181
Sum of reward

Sum of rewards 236.0
Simulating episode 6344
Sum of rewards 94.0
Simulating episode 6345
Sum of rewards 91.0
Simulating episode 6346
Sum of rewards 173.0
Simulating episode 6347
Sum of rewards 111.0
Simulating episode 6348
Sum of rewards 129.0
Simulating episode 6349
Sum of rewards 168.0
Simulating episode 6350
Sum of rewards 368.0
Simulating episode 6351
Sum of rewards 98.0
Simulating episode 6352
Sum of rewards 109.0
Simulating episode 6353
Sum of rewards 170.0
Simulating episode 6354
Sum of rewards 97.0
Simulating episode 6355
Sum of rewards 185.0
Simulating episode 6356
Sum of rewards 172.0
Simulating episode 6357
Sum of rewards 141.0
Simulating episode 6358
Sum of rewards 153.0
Simulating episode 6359
Sum of rewards 103.0
Simulating episode 6360
Sum of rewards 65.0
Simulating episode 6361
Sum of rewards 123.0
Simulating episode 6362
Sum of rewards 94.0
Simulating episode 6363
Sum of rewards 288.0
Simulating episode 6364
Sum of rewards 116.0
Simulating episode 6365
Sum of rewards 9

Sum of rewards 131.0
Simulating episode 6527
Sum of rewards 96.0
Simulating episode 6528
Sum of rewards 145.0
Simulating episode 6529
Sum of rewards 402.0
Simulating episode 6530
Sum of rewards 183.0
Simulating episode 6531
Sum of rewards 121.0
Simulating episode 6532
Sum of rewards 236.0
Simulating episode 6533
Sum of rewards 107.0
Simulating episode 6534
Sum of rewards 136.0
Simulating episode 6535
Sum of rewards 72.0
Simulating episode 6536
Sum of rewards 204.0
Simulating episode 6537
Sum of rewards 91.0
Simulating episode 6538
Sum of rewards 225.0
Simulating episode 6539
Sum of rewards 168.0
Simulating episode 6540
Sum of rewards 284.0
Simulating episode 6541
Sum of rewards 81.0
Simulating episode 6542
Sum of rewards 134.0
Simulating episode 6543
Sum of rewards 166.0
Simulating episode 6544
Sum of rewards 169.0
Simulating episode 6545
Sum of rewards 178.0
Simulating episode 6546
Sum of rewards 234.0
Simulating episode 6547
Sum of rewards 220.0
Simulating episode 6548
Sum of rewards

Simulating episode 6710
Sum of rewards 106.0
Simulating episode 6711
Sum of rewards 101.0
Simulating episode 6712
Sum of rewards 107.0
Simulating episode 6713
Sum of rewards 187.0
Simulating episode 6714
Sum of rewards 241.0
Simulating episode 6715
Sum of rewards 97.0
Simulating episode 6716
Sum of rewards 93.0
Simulating episode 6717
Sum of rewards 56.0
Simulating episode 6718
Sum of rewards 88.0
Simulating episode 6719
Sum of rewards 111.0
Simulating episode 6720
Sum of rewards 193.0
Simulating episode 6721
Sum of rewards 137.0
Simulating episode 6722
Sum of rewards 174.0
Simulating episode 6723
Sum of rewards 109.0
Simulating episode 6724
Sum of rewards 181.0
Simulating episode 6725
Sum of rewards 185.0
Simulating episode 6726
Sum of rewards 197.0
Simulating episode 6727
Sum of rewards 120.0
Simulating episode 6728
Sum of rewards 164.0
Simulating episode 6729
Sum of rewards 310.0
Simulating episode 6730
Sum of rewards 180.0
Simulating episode 6731
Sum of rewards 202.0
Simulating epi

Sum of rewards 123.0
Simulating episode 6894
Sum of rewards 193.0
Simulating episode 6895
Sum of rewards 178.0
Simulating episode 6896
Sum of rewards 199.0
Simulating episode 6897
Sum of rewards 113.0
Simulating episode 6898
Sum of rewards 229.0
Simulating episode 6899
Sum of rewards 120.0
Simulating episode 6900
Sum of rewards 32.0
Simulating episode 6901
Sum of rewards 155.0
Simulating episode 6902
Sum of rewards 155.0
Simulating episode 6903
Sum of rewards 100.0
Simulating episode 6904
Sum of rewards 129.0
Simulating episode 6905
Sum of rewards 330.0
Simulating episode 6906
Sum of rewards 225.0
Simulating episode 6907
Sum of rewards 153.0
Simulating episode 6908
Sum of rewards 167.0
Simulating episode 6909
Sum of rewards 330.0
Simulating episode 6910
Sum of rewards 265.0
Simulating episode 6911
Sum of rewards 180.0
Simulating episode 6912
Sum of rewards 145.0
Simulating episode 6913
Sum of rewards 62.0
Simulating episode 6914
Sum of rewards 138.0
Simulating episode 6915
Sum of rewar

Sum of rewards 162.0
Simulating episode 7078
Sum of rewards 143.0
Simulating episode 7079
Sum of rewards 123.0
Simulating episode 7080
Sum of rewards 132.0
Simulating episode 7081
Sum of rewards 114.0
Simulating episode 7082
Sum of rewards 153.0
Simulating episode 7083
Sum of rewards 114.0
Simulating episode 7084
Sum of rewards 158.0
Simulating episode 7085
Sum of rewards 212.0
Simulating episode 7086
Sum of rewards 163.0
Simulating episode 7087
Sum of rewards 147.0
Simulating episode 7088
Sum of rewards 180.0
Simulating episode 7089
Sum of rewards 156.0
Simulating episode 7090
Sum of rewards 175.0
Simulating episode 7091
Sum of rewards 132.0
Simulating episode 7092
Sum of rewards 155.0
Simulating episode 7093
Sum of rewards 148.0
Simulating episode 7094
Sum of rewards 201.0
Simulating episode 7095
Sum of rewards 185.0
Simulating episode 7096
Sum of rewards 179.0
Simulating episode 7097
Sum of rewards 184.0
Simulating episode 7098
Sum of rewards 163.0
Simulating episode 7099
Sum of rew

Sum of rewards 212.0
Simulating episode 7261
Sum of rewards 199.0
Simulating episode 7262
Sum of rewards 163.0
Simulating episode 7263
Sum of rewards 159.0
Simulating episode 7264
Sum of rewards 131.0
Simulating episode 7265
Sum of rewards 144.0
Simulating episode 7266
Sum of rewards 123.0
Simulating episode 7267
Sum of rewards 131.0
Simulating episode 7268
Sum of rewards 150.0
Simulating episode 7269
Sum of rewards 129.0
Simulating episode 7270
Sum of rewards 102.0
Simulating episode 7271
Sum of rewards 86.0
Simulating episode 7272
Sum of rewards 121.0
Simulating episode 7273
Sum of rewards 172.0
Simulating episode 7274
Sum of rewards 165.0
Simulating episode 7275
Sum of rewards 216.0
Simulating episode 7276
Sum of rewards 256.0
Simulating episode 7277
Sum of rewards 168.0
Simulating episode 7278
Sum of rewards 151.0
Simulating episode 7279
Sum of rewards 157.0
Simulating episode 7280
Sum of rewards 188.0
Simulating episode 7281
Sum of rewards 308.0
Simulating episode 7282
Sum of rewa

Sum of rewards 126.0
Simulating episode 7444
Sum of rewards 140.0
Simulating episode 7445
Sum of rewards 106.0
Simulating episode 7446
Sum of rewards 77.0
Simulating episode 7447
Sum of rewards 82.0
Simulating episode 7448
Sum of rewards 149.0
Simulating episode 7449
Sum of rewards 133.0
Simulating episode 7450
Sum of rewards 105.0
Simulating episode 7451
Sum of rewards 216.0
Simulating episode 7452
Sum of rewards 266.0
Simulating episode 7453
Sum of rewards 203.0
Simulating episode 7454
Sum of rewards 217.0
Simulating episode 7455
Sum of rewards 234.0
Simulating episode 7456
Sum of rewards 130.0
Simulating episode 7457
Sum of rewards 163.0
Simulating episode 7458
Sum of rewards 166.0
Simulating episode 7459
Sum of rewards 159.0
Simulating episode 7460
Sum of rewards 145.0
Simulating episode 7461
Sum of rewards 160.0
Simulating episode 7462
Sum of rewards 192.0
Simulating episode 7463
Sum of rewards 257.0
Simulating episode 7464
Sum of rewards 159.0
Simulating episode 7465
Sum of rewar

Sum of rewards 136.0
Simulating episode 7628
Sum of rewards 126.0
Simulating episode 7629
Sum of rewards 193.0
Simulating episode 7630
Sum of rewards 73.0
Simulating episode 7631
Sum of rewards 168.0
Simulating episode 7632
Sum of rewards 156.0
Simulating episode 7633
Sum of rewards 138.0
Simulating episode 7634
Sum of rewards 159.0
Simulating episode 7635
Sum of rewards 175.0
Simulating episode 7636
Sum of rewards 104.0
Simulating episode 7637
Sum of rewards 160.0
Simulating episode 7638
Sum of rewards 154.0
Simulating episode 7639
Sum of rewards 179.0
Simulating episode 7640
Sum of rewards 185.0
Simulating episode 7641
Sum of rewards 170.0
Simulating episode 7642
Sum of rewards 153.0
Simulating episode 7643
Sum of rewards 143.0
Simulating episode 7644
Sum of rewards 183.0
Simulating episode 7645
Sum of rewards 174.0
Simulating episode 7646
Sum of rewards 147.0
Simulating episode 7647
Sum of rewards 139.0
Simulating episode 7648
Sum of rewards 155.0
Simulating episode 7649
Sum of rewa