In [1]:
import gym
import pylab
import random
import numpy as np
from collections import deque
import keras
from keras import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras import initializers

Using TensorFlow backend.
  return f(*args, **kwds)


## Task: fill empty spaces in the following agent code

In [2]:
class DeepQAgent:
    def __init__(self, state_size, action_size, render=True):
        # Tip: if you are training this on AWS the best way is to turn off rendering
        # and load it later with the serialized model
        self.render = render
        self.state_size = state_size
        self.action_size = action_size

        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.epsilon = 0.6
        self.epsilon_min = 0.005
        self.epsilon_decay = (self.epsilon - self.epsilon_min) / 30000
        self.batch_size = 64
        self.train_start = 1000
        # replay memory
        self.memory = deque(maxlen=20000)

        self.model = self.build_model()
        self.target_model = self.build_model()
        self.update_target_model()

    def build_model(self):
        # Use tflearn to get simple NN for deep q-learning
        # Spoler alert: a couple of fully connected hidden layers should be enough
        # Output layer should have the same dimensionality as the action space
        model = Sequential([
#             Dropout(0.2, input_shape=(self.state_size,)),
            Dense(24, input_dim=self.state_size, kernel_initializer=initializers.random_normal(stddev=0.01)),
            Activation('relu'),
#             Dropout(0.2),
            Dense(24, kernel_initializer=initializers.random_normal(stddev=0.01)),
            Activation('relu'),
#             Dropout(0.2),
            Dense(self.action_size, activation='linear', kernel_initializer=initializers.random_normal(stddev=0.01)),
        ])
        
        model.compile(optimizer=Adam(lr=self.learning_rate), loss='mse')
        return model

    def update_target_model(self):
        """Update your target model to the model you are currently learning at regular time intervals"""
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, state):
        """The choice of action uses the epsilon-greedy policy for the current network."""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_value = self.model.predict(state)
            return np.argmax(q_value[0])

    def replay_memory(self, state, action, reward, next_state, done):
        """Save <s, a, r, s'> to replay_memory"""
        if action == 2:
            action = 1
        self.memory.append((state, action, reward, next_state, done))
        if self.epsilon > self.epsilon_min:
            self.epsilon -= self.epsilon_decay
            # print(len(self.memory))

    def train_replay(self):
        """Random sampling of batch_size samples from replay memory"""
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.action_size))
        
        for i in range(batch_size):
            state, action, reward, next_state, done = mini_batch[i]
            target = self.model.predict(state)[0]

            # As in queuing, it gets the maximum Q Value at s'. However, it is imported from the target model.
            if done:
                target[action] = reward
            else:
                target[action] = reward + self.discount_factor * \
                                          np.amax(self.target_model.predict(next_state)[0])
            update_input[i] = state
            update_target[i] = target

        # You can create a minibatch of the correct target answer and the current value of your own,
        self.model.fit(update_input, update_target, batch_size=batch_size, epochs=1, verbose=0)

    def load_model(self, name):
        self.model.load_model(name)

    def save_model(self, name):
        self.model.save(name)


In [None]:
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0] # should be equal 2
ACTION_SIZE = 2
agent = DeepQAgent(state_size, ACTION_SIZE)
# agent.load_model("./save_model/rl_model")
scores, episodes = [], []
N_EPISODES = 4000

In [None]:
for e in range(N_EPISODES):
    done = False
    score = 0
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    print(state)

    # Action 0 (left), 1 (do nothing), 3 (declare fake_action to avoid doing nothing
    fake_action = 0

    # Counter for the same action 4 times
    action_count = 0

    while not done:
        if agent.render:
            env.render()

        # Select an action in the current state and proceed to a step
        action_count = action_count + 1

        if action_count == 4:
            action = agent.get_action(state)
            action_count = 0

            if action == 0:
                fake_action = 0
            elif action == 1:
                fake_action = 2

        # Take 1 step with the selected action
        next_state, reward, done, info = env.step(fake_action)
        next_state = np.reshape(next_state, [1, state_size])
        # Give a penalty of -100 for actions that end an episode
        # reward = reward if not done else -100

        # Save <s, a, r, s'> to replay memory
        agent.replay_memory(state, fake_action, reward, next_state, done)
        # Continue to learn every time step
        agent.train_replay()
        score += reward
        state = next_state

        if done:
            env.reset()
            # Copy the learning model for each episode to the target model
            agent.update_target_model()

            # For each episode, the time step where cartpole stood is plot
            scores.append(score)
            episodes.append(e)
            print("episode:", e, "  score:", score, "  memory length:", len(agent.memory),
                  "  epsilon:", agent.epsilon)

    # Save model for every 50 episodes
    if e % 50 == 0:
        agent.save_model("./save_model/rl_model_iter{}".format(e))

[[-0.51411818  0.        ]]
episode: 0   score: -200.0   memory length: 200   epsilon: 0.596033333333334
[[-0.57239168  0.        ]]
episode: 1   score: -200.0   memory length: 400   epsilon: 0.592066666666668
[[-0.49942405  0.        ]]
episode: 2   score: -200.0   memory length: 600   epsilon: 0.588100000000002
[[-0.43475462  0.        ]]
episode: 3   score: -200.0   memory length: 800   epsilon: 0.584133333333336
[[-0.45380175  0.        ]]
episode: 4   score: -200.0   memory length: 1000   epsilon: 0.5801666666666699
[[-0.52912733  0.        ]]
episode: 5   score: -200.0   memory length: 1200   epsilon: 0.5762000000000039
[[-0.54556391  0.        ]]
episode: 6   score: -200.0   memory length: 1400   epsilon: 0.5722333333333379
[[-0.49991958  0.        ]]
episode: 7   score: -200.0   memory length: 1600   epsilon: 0.5682666666666719
[[-0.59062751  0.        ]]
episode: 8   score: -200.0   memory length: 1800   epsilon: 0.5643000000000059
[[-0.47938365  0.        ]]
episode: 9   scor

episode: 76   score: -200.0   memory length: 15220   epsilon: 0.2981366666667168
[[-0.44098291  0.        ]]
episode: 77   score: -200.0   memory length: 15420   epsilon: 0.2941700000000508
[[-0.42929151  0.        ]]
episode: 78   score: -200.0   memory length: 15620   epsilon: 0.29020333333338477
[[-0.43338318  0.        ]]
episode: 79   score: -200.0   memory length: 15820   epsilon: 0.28623666666671876
[[-0.43864286  0.        ]]
episode: 80   score: -200.0   memory length: 16020   epsilon: 0.28227000000005276
[[-0.48322152  0.        ]]
episode: 81   score: -200.0   memory length: 16220   epsilon: 0.27830333333338675
[[-0.47894299  0.        ]]
episode: 82   score: -158.0   memory length: 16378   epsilon: 0.2751696666667206
[[-0.49311876  0.        ]]
episode: 83   score: -177.0   memory length: 16555   epsilon: 0.2716591666667212
[[-0.42936721  0.        ]]
episode: 84   score: -200.0   memory length: 16755   epsilon: 0.2676925000000552
[[-0.58511365  0.        ]]
episode: 85   s

episode: 151   score: -200.0   memory length: 20000   epsilon: 0.009422833333427593
[[-0.54574402  0.        ]]
episode: 152   score: -200.0   memory length: 20000   epsilon: 0.005456166666760891
[[-0.5222183  0.       ]]
episode: 153   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54766738  0.        ]]
episode: 154   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41430205  0.        ]]
episode: 155   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52470176  0.        ]]
episode: 156   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58276574  0.        ]]
episode: 157   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4238992  0.       ]]
episode: 158   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53343999  0.        ]]
episode: 159   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42269138  0.       

episode: 225   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42531664  0.        ]]
episode: 226   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52460863  0.        ]]
episode: 227   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42684704  0.        ]]
episode: 228   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46953198  0.        ]]
episode: 229   score: -141.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48837782  0.        ]]
episode: 230   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45290078  0.        ]]
episode: 231   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54664395  0.        ]]
episode: 232   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4113227  0.       ]]
episode: 233   score: -154.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.43463229  0.     

episode: 299   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5901521  0.       ]]
episode: 300   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48588907  0.        ]]
episode: 301   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50321512  0.        ]]
episode: 302   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54360899  0.        ]]
episode: 303   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41266594  0.        ]]
episode: 304   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58549889  0.        ]]
episode: 305   score: -109.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46093013  0.        ]]
episode: 306   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55011251  0.        ]]
episode: 307   score: -171.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42238673  0.     

episode: 373   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55601144  0.        ]]
episode: 374   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58188812  0.        ]]
episode: 375   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47854431  0.        ]]
episode: 376   score: -146.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50104432  0.        ]]
episode: 377   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5193961  0.       ]]
episode: 378   score: -110.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46826889  0.        ]]
episode: 379   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53103202  0.        ]]
episode: 380   score: -155.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56285822  0.        ]]
episode: 381   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50710621  0.     

episode: 447   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.575419  0.      ]]
episode: 448   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51537029  0.        ]]
episode: 449   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5882198  0.       ]]
episode: 450   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53745269  0.        ]]
episode: 451   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53305088  0.        ]]
episode: 452   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58701283  0.        ]]
episode: 453   score: -133.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57651585  0.        ]]
episode: 454   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.40036303  0.        ]]
episode: 455   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55659787  0.        ]

episode: 521   score: -123.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55947413  0.        ]]
episode: 522   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.40728189  0.        ]]
episode: 523   score: -140.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51750508  0.        ]]
episode: 524   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53796165  0.        ]]
episode: 525   score: -197.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5395144  0.       ]]
episode: 526   score: -130.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42568327  0.        ]]
episode: 527   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.49010097  0.        ]]
episode: 528   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47099685  0.        ]]
episode: 529   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58428365  0.     

episode: 595   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57886547  0.        ]]
episode: 596   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57084514  0.        ]]
episode: 597   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4031979  0.       ]]
episode: 598   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48299639  0.        ]]
episode: 599   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56003992  0.        ]]
episode: 600   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50975666  0.        ]]
episode: 601   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48606714  0.        ]]
episode: 602   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45233874  0.        ]]
episode: 603   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48263799  0.     

episode: 669   score: -125.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56729767  0.        ]]
episode: 670   score: -148.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.44141882  0.        ]]
episode: 671   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46735433  0.        ]]
episode: 672   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4653735  0.       ]]
episode: 673   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54127005  0.        ]]
episode: 674   score: -124.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54196165  0.        ]]
episode: 675   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45015564  0.        ]]
episode: 676   score: -189.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51147498  0.        ]]
episode: 677   score: -147.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53341058  0.     

episode: 743   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.43684848  0.        ]]
episode: 744   score: -195.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.43439939  0.        ]]
episode: 745   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59893231  0.        ]]
episode: 746   score: -136.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4833036  0.       ]]
episode: 747   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4149037  0.       ]]
episode: 748   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56736181  0.        ]]
episode: 749   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47410508  0.        ]]
episode: 750   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42229567  0.        ]]
episode: 751   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56987646  0.       

episode: 817   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48604448  0.        ]]
episode: 818   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42739604  0.        ]]
episode: 819   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57741724  0.        ]]
episode: 820   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54092594  0.        ]]
episode: 821   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48474479  0.        ]]
episode: 822   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5981094  0.       ]]
episode: 823   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42045364  0.        ]]
episode: 824   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59716311  0.        ]]
episode: 825   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46814673  0.     

episode: 891   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54634728  0.        ]]
episode: 892   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46918974  0.        ]]
episode: 893   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48126523  0.        ]]
episode: 894   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47557793  0.        ]]
episode: 895   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53680404  0.        ]]
episode: 896   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45739139  0.        ]]
episode: 897   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47646776  0.        ]]
episode: 898   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45645487  0.        ]]
episode: 899   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57830528  0.   

episode: 965   score: -137.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41464829  0.        ]]
episode: 966   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46383809  0.        ]]
episode: 967   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50048618  0.        ]]
episode: 968   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51378167  0.        ]]
episode: 969   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47402298  0.        ]]
episode: 970   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53329189  0.        ]]
episode: 971   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55925473  0.        ]]
episode: 972   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52320695  0.        ]]
episode: 973   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45098193  0.   

episode: 1038   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47906503  0.        ]]
episode: 1039   score: -133.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57486111  0.        ]]
episode: 1040   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52508606  0.        ]]
episode: 1041   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55210186  0.        ]]
episode: 1042   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59432291  0.        ]]
episode: 1043   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.44981826  0.        ]]
episode: 1044   score: -146.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58475718  0.        ]]
episode: 1045   score: -166.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46172846  0.        ]]
episode: 1046   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.569359

episode: 1111   score: -120.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57326666  0.        ]]
episode: 1112   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58294309  0.        ]]
episode: 1113   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41546059  0.        ]]
episode: 1114   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.43386932  0.        ]]
episode: 1115   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45045942  0.        ]]
episode: 1116   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.40359375  0.        ]]
episode: 1117   score: -83.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46192583  0.        ]]
episode: 1118   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45772527  0.        ]]
episode: 1119   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5826668

episode: 1184   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55754146  0.        ]]
episode: 1185   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45739508  0.        ]]
episode: 1186   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48840813  0.        ]]
episode: 1187   score: -132.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.40900841  0.        ]]
episode: 1188   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4198315  0.       ]]
episode: 1189   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48038054  0.        ]]
episode: 1190   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47539521  0.        ]]
episode: 1191   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46558564  0.        ]]
episode: 1192   score: -165.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46333802

episode: 1257   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.44072977  0.        ]]
episode: 1258   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55564602  0.        ]]
episode: 1259   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59450587  0.        ]]
episode: 1260   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50475658  0.        ]]
episode: 1261   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59595498  0.        ]]
episode: 1262   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4305307  0.       ]]
episode: 1263   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51909748  0.        ]]
episode: 1264   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59621818  0.        ]]
episode: 1265   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56785092

episode: 1330   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42085324  0.        ]]
episode: 1331   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.44860062  0.        ]]
episode: 1332   score: -173.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42127721  0.        ]]
episode: 1333   score: -145.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57520853  0.        ]]
episode: 1334   score: -131.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54799299  0.        ]]
episode: 1335   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41087001  0.        ]]
episode: 1336   score: -86.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42469485  0.        ]]
episode: 1337   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51742162  0.        ]]
episode: 1338   score: -133.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5757038

episode: 1403   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59371512  0.        ]]
episode: 1404   score: -135.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56253267  0.        ]]
episode: 1405   score: -122.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5769673  0.       ]]
episode: 1406   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58806381  0.        ]]
episode: 1407   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53834143  0.        ]]
episode: 1408   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41554742  0.        ]]
episode: 1409   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.41812067  0.        ]]
episode: 1410   score: -157.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56860193  0.        ]]
episode: 1411   score: -127.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55463874

episode: 1476   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58387933  0.        ]]
episode: 1477   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.44866659  0.        ]]
episode: 1478   score: -94.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57675676  0.        ]]
episode: 1479   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59517326  0.        ]]
episode: 1480   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46717181  0.        ]]
episode: 1481   score: -177.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52933103  0.        ]]
episode: 1482   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57131597  0.        ]]
episode: 1483   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48997478  0.        ]]
episode: 1484   score: -126.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4198911

episode: 1549   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54616439  0.        ]]
episode: 1550   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5828567  0.       ]]
episode: 1551   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45976368  0.        ]]
episode: 1552   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.40025605  0.        ]]
episode: 1553   score: -150.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46855507  0.        ]]
episode: 1554   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55467872  0.        ]]
episode: 1555   score: -119.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54184833  0.        ]]
episode: 1556   score: -140.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57034881  0.        ]]
episode: 1557   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.40622414

episode: 1622   score: -197.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59247626  0.        ]]
episode: 1623   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45060992  0.        ]]
episode: 1624   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47588283  0.        ]]
episode: 1625   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.44186953  0.        ]]
episode: 1626   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4554628  0.       ]]
episode: 1627   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45723007  0.        ]]
episode: 1628   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55722383  0.        ]]
episode: 1629   score: -117.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4799045  0.       ]]
episode: 1630   score: -199.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42259217  

episode: 1695   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58240776  0.        ]]
episode: 1696   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55738624  0.        ]]
episode: 1697   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.54994421  0.        ]]
episode: 1698   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48124143  0.        ]]
episode: 1699   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52842379  0.        ]]
episode: 1700   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.56907115  0.        ]]
episode: 1701   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45925102  0.        ]]
episode: 1702   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57693873  0.        ]]
episode: 1703   score: -127.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.585239

episode: 1768   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.58137803  0.        ]]
episode: 1769   score: -127.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53797681  0.        ]]
episode: 1770   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.51493064  0.        ]]
episode: 1771   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46242876  0.        ]]
episode: 1772   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55120814  0.        ]]
episode: 1773   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42112239  0.        ]]
episode: 1774   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5717656  0.       ]]
episode: 1775   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5174648  0.       ]]
episode: 1776   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52322742  

episode: 1841   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.59459668  0.        ]]
episode: 1842   score: -134.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50271885  0.        ]]
episode: 1843   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52690198  0.        ]]
episode: 1844   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.48694961  0.        ]]
episode: 1845   score: -152.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.53538642  0.        ]]
episode: 1846   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.42358653  0.        ]]
episode: 1847   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.45712671  0.        ]]
episode: 1848   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.50954556  0.        ]]
episode: 1849   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.530691

episode: 1914   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.47831631  0.        ]]
episode: 1915   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.55485231  0.        ]]
episode: 1916   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.46054673  0.        ]]
episode: 1917   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.57331499  0.        ]]
episode: 1918   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.43268326  0.        ]]
episode: 1919   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.5023123  0.       ]]
episode: 1920   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.52805843  0.        ]]
episode: 1921   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.4268045  0.       ]]
episode: 1922   score: -200.0   memory length: 20000   epsilon: 0.004980166666760887
[[-0.49807325  