In [1]:
from matplotlib import pylab
from pylab import *
import numpy as np
from datetime import datetime
from collections import deque
from keras.layers import Dense, Flatten, Conv2D, Input
from keras.optimizers import Adam, RMSprop
from keras.models import Sequential
# from environment_cnn_dqn_1 import Env as ENV_1
# from environment_cnn_dqn_2 import Env as ENV_2
import cartpole
import _pickle as pickle
import time
from keras import backend as K
import tensorflow as tf
import threading
from keras.models import Model
import os

Using TensorFlow backend.


In [2]:
SAVE_FOLDER_NAME = "./save"
GRAPTH_FOLDER_NAME = "./save/graph"
MODEL_FOLDER_NAME = "./save/model"
SAVED_ACTOR_MODEL_PATH = "./save/model/A3C_actor_rotary_pendulum.h5"
SAVED_CRITIC_MODEL_PATH = "./save/model/A3C_critic_rotary_pendulum.h5"
LOCAL_GRAPH_PATH = "./save/graph/A3C_rotary_pendulum_env_{0}.png"
LOSS_GRAPH_PATH_1 = "./save/graph/A3C_actor_loss_env_{0}.png"
LOSS_GRAPH_PATH_2 = "./save/graph/A3C_critic_loss_env_{0}.png"
LAST_EPISODE_N_PATH = "./save/A3C_last_episode_num.txt"
SAVED_REPLAY_MEMORY_PATH = "./save/A3C_memory.pickle"
GRAPH_DATA_PATH = "./save/A3C_graph_data.pickle"

EPISODES = 10000
HISTORY_LENGTH = 8
MEMORY_LENGTH = 2000
DISCOUNT_FACTOR = 0.99
LEARNING_RATE = 0.001
THREADS = 1
# ACTION_SIZE = 3
ACTION_SIZE = 2
STATE_SIZE = 4
ENV_COUNT = 0
TRAIN_ITERATION_N = 1000
BATCH_SIZE = 256
USE_IMITATION = False

if not os.path.exists(SAVE_FOLDER_NAME):
    os.makedirs(SAVE_FOLDER_NAME)

if not os.path.exists(GRAPTH_FOLDER_NAME):
    os.makedirs(GRAPTH_FOLDER_NAME)

if not os.path.exists(MODEL_FOLDER_NAME):
    os.makedirs(MODEL_FOLDER_NAME)

In [3]:
class A3C:
    def __init__(self, state_size, action_size):
        self.load_model = False
        self.state_size = state_size
        self.action_size = action_size
        self.replay_memory = deque(maxlen=MEMORY_LENGTH)
        self.hidden1, self.hidden2 = 24, 24
        self.actor, self.critic = self.build_model()
        self.sess = tf.InteractiveSession()
        K.set_session(self.sess)
        self.sess.run(tf.global_variables_initializer())
        self.actor_optimizer = self.actor_optimizer()
        self.critic_optimizer = self.critic_optimizer()
        

        if os.path.exists(SAVED_ACTOR_MODEL_PATH) and os.path.exists(SAVED_CRITIC_MODEL_PATH):
            self.load_model_from_saved_files()

    def build_model(self):
        state = Input(batch_shape=(None,  self.state_size))
        shared = Dense(self.hidden1, input_dim=self.state_size, activation='relu', kernel_initializer='glorot_uniform')(state)

        actor_hidden = Dense(self.hidden2, activation='relu', kernel_initializer='glorot_uniform')(shared)
        action_prob = Dense(self.action_size, activation='softmax', kernel_initializer='glorot_uniform')(actor_hidden)

        value_hidden = Dense(self.hidden2, activation='relu', kernel_initializer='he_uniform')(shared)
        state_value = Dense(1, activation='linear', kernel_initializer='he_uniform')(value_hidden)

        actor = Model(inputs=state, outputs=action_prob)
        critic = Model(inputs=state, outputs=state_value)

        actor._make_predict_function()
        critic._make_predict_function()

        actor.summary()
        critic.summary()

        return actor, critic

    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[None, ])
        policy = self.actor.output
        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages)
        loss = -K.sum(eligibility)
        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        actor_loss = loss + 0.01 * entropy
        optimizer = Adam(lr=LEARNING_RATE)
        # optimizer = RMSprop(lr=LEARNING_RATE, rho=0.98, epsilon=0.001)

        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        train = K.function([self.actor.input, action, advantages], [loss], updates=updates)

        return train

    def critic_optimizer(self):
        discounted_reward = K.placeholder(shape=(None,))
        value = self.critic.output
        loss = K.mean(K.square(discounted_reward - value))
        optimizer = Adam(lr=LEARNING_RATE)
        # optimizer = RMSprop(lr=LEARNING_RATE, rho=0.98, epsilon=0.001)

        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, discounted_reward], [loss], updates=updates)

        return train

    def load_model_from_saved_files(self):
        self.actor.load_weights(SAVED_ACTOR_MODEL_PATH)
        self.critic.load_weights(SAVED_CRITIC_MODEL_PATH)
        self.replay_memory_load(SAVED_REPLAY_MEMORY_PATH)

    def replay_memory_dump(self, file_name):
        with open(file_name, 'wb') as memory_file:
            pickle.dump(self.replay_memory, memory_file)

    def replay_memory_load(self, file_name):
        with open(file_name, 'rb') as memory_file:
            self.replay_memory = pickle.load(memory_file)

    def append_sample(self, state, action, reward):
        self.replay_memory.append((state, action, reward))

    def train_model(self):
        agents = [
            Agent(
                self, self.action_size, self.state_size,
                [self.actor, self.critic], [self.actor_optimizer, self.critic_optimizer],
                DISCOUNT_FACTOR, self.replay_memory, self.sess, id
            ) for id in range(THREADS)
        ]

        for agent in agents:
            time.sleep(1)
            agent.start()

    def save_model(self, scores, episodes, agent_id):
        pylab.clf()
        pylab.figure(1)
        pylab.title("SCORE")
        pylab.plot(episodes, scores, 'g--')
        pylab.plot(episodes, self.exp_moving_average(scores, 10), 'r')
        pylab.savefig(LOCAL_GRAPH_PATH.format(agent_id))

        self.actor.save_weights(SAVED_ACTOR_MODEL_PATH)
        self.critic.save_weights(SAVED_CRITIC_MODEL_PATH)
        self.replay_memory_dump(SAVED_REPLAY_MEMORY_PATH)

    # select action based on epsilon greedy policy
    def get_action(self, state):
        policy = self.actor.predict(np.reshape(state, [1, self.state_size]))[0]
        return np.random.choice(self.action_size, 1, p=policy)[0]

    @staticmethod
    def exp_moving_average(values, window):
        if window >= len(values):
            sma = np.mean(np.asarray(values))
            a = [sma] * len(values)
        else:
            weights = np.exp(np.linspace(-1., 0., window))
            weights /= weights.sum()
            a = np.convolve(values, weights, mode="full")[:len(values)]
            a[:window] = a[window]
        return a

In [4]:
# make agents(local) and start training
class Agent(threading.Thread):
    def __init__(self, a3c, action_size, state_size, model, optimizer, discount_factor, memory, sess, id):
        threading.Thread.__init__(self)
        self.a3c = a3c
        self.action_size = action_size
        self.state_size = state_size
        self.actor, self.critic = model
        self.optimizer = optimizer
        self.discount_factor = discount_factor
        self.memory = memory
        self.avg_p_max = 0
        self.avg_loss = 0
        self.id = id
        self.sess = sess

    def replay_memory_dump(self, file_name):
        with open(file_name, 'wb') as memory_file:
            pickle.dump(self.memory, memory_file)

    def replay_memory_load(self, file_name):
        with open(file_name, 'rb') as memory_file:
            self.memory = pickle.load(memory_file)

    def append_sample(self, state, action, reward):
        self.memory.append((state, action, reward))

    # Thread interactive with environment
    def run(self):
        global ENV_COUNT
        # global env
        episode_start_num = 0
        env = None
        
        env = cartpole.CartPoleEnv()
#         if ENV_COUNT == 0:
#             env = ENV_1()
#             ENV_COUNT += 1

#         elif ENV_COUNT == 1:
#             env = ENV_2()

#         else:
#             print("[WARNING] THREAD")

        scores, episodes, actor_losses, critic_losses = [], [], [], []
        step = 0
        success_cnt = 0
        e = 0

        print("[INFO] env-", env)

        if os.path.exists(LAST_EPISODE_N_PATH):
            f = open(LAST_EPISODE_N_PATH, 'r')
            episode_start_num = int(f.readline())
            f.close()


        for episode in range(episode_start_num, EPISODES):
            done = False
            score = 0
            observe = env.reset()
#             state_for_manual_balance = observe
#             state = np.reshape(observe, [1, STATE_SIZE, 1, 1])
#             history = np.zeros([1, STATE_SIZE, HISTORY_LENGTH, 1])

#             for i in range(HISTORY_LENGTH):
#                 history = np.delete(history, 0, axis=2)
#                 history = np.append(history, state, axis=2)
#             history = np.reshape(history, [1, STATE_SIZE, HISTORY_LENGTH, 1])

            if len(self.memory) >= MEMORY_LENGTH - 1:
                e += 1

            while not done:
#                 print("ddd")
                step += 1
                previous_time = time.perf_counter()

                if USE_IMITATION:
                    # get action for the current history and go one step in environment
                    if len(self.memory) >= MEMORY_LENGTH - 1:
                        action= self.get_action(observe)
                    # imitation learning
                    else:
                        action = self.imitation(state_for_manual_balance, theta_n_k1, theta_dot_k1, alpha_n_k1, alpha_dot_k1)
                else:
                    action = self.get_action(observe)

                next_observe, reward, done, info = env.step(action)
                # pre-process the observation --> history
#                 prior_history = history

#                 state_for_manual_balance = next_observe
#                 next_state = [next_observe[0] * 100, next_observe[1], next_observe[2] * 100, next_observe[3]]
#                 next_state = np.reshape(next_state, (1, STATE_SIZE, 1, 1))
#                 history = np.delete(history, 0, axis=2)
#                 history = np.append(history, values=next_state, axis=2)
#                 history = np.reshape(history, [1, STATE_SIZE, HISTORY_LENGTH, 1])
                # save sample <s, a, r, s'> at replay memory
#                 if not (done and step == 1):
#                     self.append_sample(prior_history, action, reward)

                score += reward
                
                observe = next_observe

#                 self.avg_p_max += np.amax(self.actor.predict(np.float32(observe)))

                if not done:
                    while True:
                        current_time = time.perf_counter()
                        if current_time - previous_time >= 6 / 1000:
                            break
                else:
                    import random
#                     env.wait()


                    if len(self.memory) >= BATCH_SIZE and step > 5:
                        # Train model
                        for i in range(TRAIN_ITERATION_N):
                            mini_batch = (np.array(random.sample(self.memory, BATCH_SIZE)))[0][0]
                            actor_loss, critic_loss = self.train_model(score, action, mini_batch, done)
                            actor_losses.append(actor_loss)
                            critic_losses.append(critic_loss)

                    scores.append(score)
                    episodes.append(episode)

                    f = open(LAST_EPISODE_N_PATH, 'w')
                    f.write(str(episode) + "\n")
                    f.close()

                    self.a3c.save_model(scores, episodes, self.id)

                    print("id: {0}, episode:{1}  score:{2}  step:{3}  info:{4}  memory length:{5}".format(
                        self.id, episode, score, step, info, len(self.memory)
                    ))
                    # print("loss:", actor_losses, critic_losses)

                    step = 0
                    sys.stdout.flush()

                    # if serial successes are persisted, system will be shutted down.
                    if step >= 5000:
                        success_cnt += 1
                    else:
                        success_cnt = 0

                    if success_cnt >= 5:
                        env.close()
                        sys.exit()

    # In Policy Gradient, Q function is not available.
    # Instead agent uses sample returns for evaluating policy
#     def discount_rewards(self, rewards, history, done):
#         discounted_rewards = 0
#         if not done:
#             discounted_rewards = self.critic.predict(np.float32(history))
#         discounted_rewards = discounted_rewards * self.discount_factor + rewards
#         return discounted_rewards
    def discount_rewards(self, rewards, done=True):
        discounted_rewards = np.zeros_like(rewards)
        running_add = 0
        if not done:
            running_add = self.critic.predict(np.reshape(self.states[-1], (1, self.state_size)))[0]
        for t in reversed(range(0, len(rewards))):
            running_add = running_add * self.discount_factor + rewards[t]
            discounted_rewards[t] = running_add
        return discounted_rewards


    # update policy network and value network every episode
    def train_model(self, rewards, actions, history, done):
        discounted_rewards = self.discount_rewards(rewards, history, done)

        values = self.critic.predict(history)
        advantages = discounted_rewards - values[0]

        actor_loss = self.optimizer[0]([history, actions, advantages])
        critic_loss = self.optimizer[1]([history, discounted_rewards])

        return actor_loss, critic_loss

#     def get_action(self, history):
#         history = np.float32(history)
#         policy = self.actor.predict(history)[0]
#         policy = np.abs(policy) + [10e-10]
#         action_index = np.argmax(policy)
#         return action_index, policy

    def get_action(self, state):
        policy = self.actor.predict(np.reshape(state, [1, self.state_size]))[0]
        return np.random.choice(self.action_size, 1, p=policy)[0]
    
    def imitation(self, state_for_manual_balance, theta_n_k1, theta_dot_k1, alpha_n_k1, alpha_dot_k1):
        kp_theta = 2.0
        kd_theta = -2.0
        kp_alpha = -30.0
        kd_alpha = 2.5

        alpha = state_for_manual_balance[0]
        theta = state_for_manual_balance[2]

        theta_n = -theta
        theta_dot = (50.0 * theta_n) - (50.0 * theta_n_k1) + (0.7612 * theta_dot_k1)  # 5ms

        alpha_n = -alpha
        alpha_dot = (50.0 * alpha_n) - (50.0 * alpha_n_k1) + (0.7612 * alpha_dot_k1)  # 5ms

        # multiply by proportional and derivative gains
        motor_voltage = (theta * kp_theta) + (theta_dot * kd_theta) + (alpha * kp_alpha) + (
                alpha_dot * kd_alpha)

        # set the saturation limit to +/- 15V
        if motor_voltage > 15.0:
            motor_voltage = 15.0
        elif motor_voltage < -15.0:
            motor_voltage = -15.0

        # invert for positive CCW
        motor_voltage = -motor_voltage

        # convert the analog value to the PWM duty cycle that will produce the same average voltage
        motorPWM = motor_voltage * (625.0 / 15.0)
        motorPWM = int(motorPWM)
        motorPWM += -(motorPWM % 50)

        if motorPWM < 0:
            action = 0
        elif 0 < motorPWM:
            action = 2
        else:
            action = 1

        return action

In [5]:
if __name__ == "__main__":
    print(str(datetime.now()) + ' started!', flush=True)
    global_agent = A3C(action_size=ACTION_SIZE, state_size=STATE_SIZE)
    global_agent.train_model()

2019-03-26 17:25:15.387344 started!
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 24)                120       
_________________________________________________________________
dense_2 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 

id: 0, episode:90  score:23.0  step:23  info:{}  memory length:0
id: 0, episode:91  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:92  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:93  score:82.0  step:82  info:{}  memory length:0
id: 0, episode:94  score:34.0  step:34  info:{}  memory length:0
id: 0, episode:95  score:50.0  step:50  info:{}  memory length:0
id: 0, episode:96  score:47.0  step:47  info:{}  memory length:0
id: 0, episode:97  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:98  score:36.0  step:36  info:{}  memory length:0
id: 0, episode:99  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:100  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:101  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:102  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:103  score:31.0  step:31  info:{}  memory length:0
id: 0, episode:104  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:105  

id: 0, episode:215  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:216  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:217  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:218  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:219  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:220  score:43.0  step:43  info:{}  memory length:0
id: 0, episode:221  score:29.0  step:29  info:{}  memory length:0
id: 0, episode:222  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:223  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:224  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:225  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:226  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:227  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:228  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:229  score:11.0  step:11  info:{}  memory length:0
id: 0, episode

id: 0, episode:340  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:341  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:342  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:343  score:23.0  step:23  info:{}  memory length:0
id: 0, episode:344  score:27.0  step:27  info:{}  memory length:0
id: 0, episode:345  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:346  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:347  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:348  score:35.0  step:35  info:{}  memory length:0
id: 0, episode:349  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:350  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:351  score:33.0  step:33  info:{}  memory length:0
id: 0, episode:352  score:27.0  step:27  info:{}  memory length:0
id: 0, episode:353  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:354  score:18.0  step:18  info:{}  memory length:0
id: 0, epi

id: 0, episode:465  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:466  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:467  score:24.0  step:24  info:{}  memory length:0
id: 0, episode:468  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:469  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:470  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:471  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:472  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:473  score:53.0  step:53  info:{}  memory length:0
id: 0, episode:474  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:475  score:24.0  step:24  info:{}  memory length:0
id: 0, episode:476  score:63.0  step:63  info:{}  memory length:0
id: 0, episode:477  score:49.0  step:49  info:{}  memory length:0
id: 0, episode:478  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:479  score:37.0  step:37  info:{}  memory length:0
id: 0, epi

id: 0, episode:590  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:591  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:592  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:593  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:594  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:595  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:596  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:597  score:27.0  step:27  info:{}  memory length:0
id: 0, episode:598  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:599  score:49.0  step:49  info:{}  memory length:0
id: 0, episode:600  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:601  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:602  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:603  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:604  score:12.0  step:12  info:{}  memory length:0
id: 0, epi

id: 0, episode:715  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:716  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:717  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:718  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:719  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:720  score:23.0  step:23  info:{}  memory length:0
id: 0, episode:721  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:722  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:723  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:724  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:725  score:29.0  step:29  info:{}  memory length:0
id: 0, episode:726  score:41.0  step:41  info:{}  memory length:0
id: 0, episode:727  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:728  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:729  score:17.0  step:17  info:{}  memory length:0
id: 0, epi

id: 0, episode:840  score:32.0  step:32  info:{}  memory length:0
id: 0, episode:841  score:35.0  step:35  info:{}  memory length:0
id: 0, episode:842  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:843  score:51.0  step:51  info:{}  memory length:0
id: 0, episode:844  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:845  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:846  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:847  score:33.0  step:33  info:{}  memory length:0
id: 0, episode:848  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:849  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:850  score:28.0  step:28  info:{}  memory length:0
id: 0, episode:851  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:852  score:58.0  step:58  info:{}  memory length:0
id: 0, episode:853  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:854  score:10.0  step:10  info:{}  memory length:0
id: 0, epi

id: 0, episode:965  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:966  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:967  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:968  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:969  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:970  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:971  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:972  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:973  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:974  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:975  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:976  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:977  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:978  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:979  score:38.0  step:38  info:{}  memory length:0
id: 0, episode:9

id: 0, episode:1088  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:1089  score:49.0  step:49  info:{}  memory length:0
id: 0, episode:1090  score:28.0  step:28  info:{}  memory length:0
id: 0, episode:1091  score:39.0  step:39  info:{}  memory length:0
id: 0, episode:1092  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:1093  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1094  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:1095  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:1096  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1097  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1098  score:56.0  step:56  info:{}  memory length:0
id: 0, episode:1099  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:1100  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1101  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:1102  score:41.0  step:41  info:{}  memory length

id: 0, episode:1211  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:1212  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:1213  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1214  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:1215  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:1216  score:44.0  step:44  info:{}  memory length:0
id: 0, episode:1217  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:1218  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:1219  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:1220  score:41.0  step:41  info:{}  memory length:0
id: 0, episode:1221  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:1222  score:45.0  step:45  info:{}  memory length:0
id: 0, episode:1223  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:1224  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:1225  score:23.0  step:23  info:{}  memory length

id: 0, episode:1334  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1335  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:1336  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:1337  score:34.0  step:34  info:{}  memory length:0
id: 0, episode:1338  score:32.0  step:32  info:{}  memory length:0
id: 0, episode:1339  score:31.0  step:31  info:{}  memory length:0
id: 0, episode:1340  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:1341  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:1342  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:1343  score:24.0  step:24  info:{}  memory length:0
id: 0, episode:1344  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:1345  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:1346  score:32.0  step:32  info:{}  memory length:0
id: 0, episode:1347  score:38.0  step:38  info:{}  memory length:0
id: 0, episode:1348  score:11.0  step:11  info:{}  memory leng

id: 0, episode:1457  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:1458  score:28.0  step:28  info:{}  memory length:0
id: 0, episode:1459  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:1460  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:1461  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:1462  score:27.0  step:27  info:{}  memory length:0
id: 0, episode:1463  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1464  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:1465  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1466  score:42.0  step:42  info:{}  memory length:0
id: 0, episode:1467  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1468  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1469  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:1470  score:42.0  step:42  info:{}  memory length:0
id: 0, episode:1471  score:24.0  step:24  info:{}  memory leng

id: 0, episode:1580  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:1581  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1582  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:1583  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1584  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1585  score:36.0  step:36  info:{}  memory length:0
id: 0, episode:1586  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:1587  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1588  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1589  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1590  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1591  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:1592  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:1593  score:32.0  step:32  info:{}  memory length:0
id: 0, episode:1594  score:24.0  step:24  info:{}  memory leng

id: 0, episode:1703  score:31.0  step:31  info:{}  memory length:0
id: 0, episode:1704  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:1705  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:1706  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:1707  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:1708  score:23.0  step:23  info:{}  memory length:0
id: 0, episode:1709  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1710  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1711  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1712  score:24.0  step:24  info:{}  memory length:0
id: 0, episode:1713  score:32.0  step:32  info:{}  memory length:0
id: 0, episode:1714  score:33.0  step:33  info:{}  memory length:0
id: 0, episode:1715  score:32.0  step:32  info:{}  memory length:0
id: 0, episode:1716  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1717  score:20.0  step:20  info:{}  memory leng

id: 0, episode:1826  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:1827  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:1828  score:62.0  step:62  info:{}  memory length:0
id: 0, episode:1829  score:46.0  step:46  info:{}  memory length:0
id: 0, episode:1830  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1831  score:8.0  step:8  info:{}  memory length:0
id: 0, episode:1832  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:1833  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1834  score:69.0  step:69  info:{}  memory length:0
id: 0, episode:1835  score:49.0  step:49  info:{}  memory length:0
id: 0, episode:1836  score:9.0  step:9  info:{}  memory length:0
id: 0, episode:1837  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1838  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1839  score:31.0  step:31  info:{}  memory length:0
id: 0, episode:1840  score:34.0  step:34  info:{}  memory length:0

id: 0, episode:1949  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:1950  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1951  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:1952  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:1953  score:100.0  step:100  info:{}  memory length:0
id: 0, episode:1954  score:83.0  step:83  info:{}  memory length:0
id: 0, episode:1955  score:28.0  step:28  info:{}  memory length:0
id: 0, episode:1956  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1957  score:14.0  step:14  info:{}  memory length:0
id: 0, episode:1958  score:29.0  step:29  info:{}  memory length:0
id: 0, episode:1959  score:45.0  step:45  info:{}  memory length:0
id: 0, episode:1960  score:18.0  step:18  info:{}  memory length:0
id: 0, episode:1961  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:1962  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:1963  score:20.0  step:20  info:{}  memory le

id: 0, episode:2072  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:2073  score:62.0  step:62  info:{}  memory length:0
id: 0, episode:2074  score:35.0  step:35  info:{}  memory length:0
id: 0, episode:2075  score:25.0  step:25  info:{}  memory length:0
id: 0, episode:2076  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:2077  score:30.0  step:30  info:{}  memory length:0
id: 0, episode:2078  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:2079  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:2080  score:16.0  step:16  info:{}  memory length:0
id: 0, episode:2081  score:13.0  step:13  info:{}  memory length:0
id: 0, episode:2082  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:2083  score:106.0  step:106  info:{}  memory length:0
id: 0, episode:2084  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:2085  score:52.0  step:52  info:{}  memory length:0
id: 0, episode:2086  score:42.0  step:42  info:{}  memory le

id: 0, episode:2195  score:21.0  step:21  info:{}  memory length:0
id: 0, episode:2196  score:28.0  step:28  info:{}  memory length:0
id: 0, episode:2197  score:22.0  step:22  info:{}  memory length:0
id: 0, episode:2198  score:12.0  step:12  info:{}  memory length:0
id: 0, episode:2199  score:42.0  step:42  info:{}  memory length:0
id: 0, episode:2200  score:10.0  step:10  info:{}  memory length:0
id: 0, episode:2201  score:17.0  step:17  info:{}  memory length:0
id: 0, episode:2202  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:2203  score:11.0  step:11  info:{}  memory length:0
id: 0, episode:2204  score:26.0  step:26  info:{}  memory length:0
id: 0, episode:2205  score:20.0  step:20  info:{}  memory length:0
id: 0, episode:2206  score:19.0  step:19  info:{}  memory length:0
id: 0, episode:2207  score:47.0  step:47  info:{}  memory length:0
id: 0, episode:2208  score:15.0  step:15  info:{}  memory length:0
id: 0, episode:2209  score:21.0  step:21  info:{}  memory leng