In [85]:
import gymnasium as gym
from gymnasium.wrappers import RecordVideo
import tensorflow as tf
import numpy as np
print(tf.__version__)
print(np.__version__)
print(gym.__version__)

2.10.0
1.23.5
0.29.1


In [86]:
eval_env = gym.make("LunarLander-v2", render_mode="rgb_array")

eval_env = RecordVideo(
    eval_env,
    video_folder="videos",
    name_prefix="dqn_lander",
    episode_trigger=lambda ep_id: True
)

  logger.warn(


In [87]:
env=gym.make("LunarLander-v2")
state,info=env.reset()
print("State shape:", state.shape)
print("""Example state: \n lander-x-position  lander-y-position  x-velocity  y-velocity  lander-angle  angular-velocity  left-leg-contact (0/1)  right-leg-contact (0/1) \n""", state)
print(info)


State shape: (8,)
Example state: 
 lander-x-position  lander-y-position  x-velocity  y-velocity  lander-angle  angular-velocity  left-leg-contact (0/1)  right-leg-contact (0/1) 
 [-8.2015991e-05  1.4073536e+00 -8.3260518e-03 -1.5850526e-01
  1.0186795e-04  1.8859804e-03  0.0000000e+00  0.0000000e+00]
{}


In [88]:
state, info = env.reset()
done = False
total_reward = 0.0
action = env.action_space.sample()
next_state, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
total_reward += reward
print("Initial state : \n", state)
print("Action take: ",action)
print("reward: ",reward)
print("new state: \n", next_state)
print("finished : ",done)
state = next_state

Initial state : 
 [-0.00319138  1.4151587  -0.32326803  0.18838756  0.00370481  0.07322507
  0.          0.        ]
Action take:  2
reward:  -2.606218559542344
new state: 
 [-0.00649061  1.4197048  -0.333062    0.20203356  0.00681665  0.06224305
  0.          0.        ]
finished :  False


## Value: even if the current reward is 0, how good can the future reward be??? more formally how good is the future given the current state and policy that we will follow to choose further actions

## Q-Value : sometimes just knowing value of a state isnt enoguh, 
### Q(s, a) = expected future total reward if action a is taken in state s, then follow current policy.
Why Q-values are great?

Because best action is simple:

best action = max( q(s,all possible actions))

In [89]:
from collections import deque
import random


# cur_state, action,reward, next state, done?
class ReplayBuffer:
    def __init__(self,max_size,state_dim):
        self.max_size=max_size
        self.state_dim=state_dim
        self.memory=deque(maxlen=max_size)
    def add (self,state,action,reward,next_state,done):
        self.memory.append((state,action,reward, next_state,done))

    def sample(self,batch_size):
        batch=random.sample(self.memory,batch_size)
        states,actions,rewards,next_states,dones=zip(*batch)

        states=np.array(states,dtype=np.float32)
        actions=np.array(actions,dtype=np.int32)
        rewards=np.array(rewards,dtype=np.float32)
        next_states=np.array(next_states,dtype=np.float32)
        dones=np.array(dones,dtype=np.float32)

        return states,actions,rewards,next_states,dones
    
    def __len__(self):
        return len(self.memory)

In [90]:
def create_neural_network(state_dim,action_dim):
    model=tf.keras.Sequential([
        tf.keras.layers.Dense(64,activation='relu',input_dim=state_dim),
        tf.keras.layers.Dense(64,activation='relu'),
        tf.keras.layers.Dense(action_dim)
    ])
    return model

state_dim=8
action_dim=4

q_network=create_neural_network(state_dim,action_dim)
target_q_network=create_neural_network(state_dim,action_dim)

target_q_network.set_weights(q_network.get_weights())

In [91]:
def select_action(state,epsilon):
    if np.random.rand()<epsilon:
        return env.action_space.sample()
    else:
        state_tensor = tf.convert_to_tensor(state[None, :], dtype=tf.float32)
        q_values = q_network(state_tensor)
        action = tf.argmax(q_values[0]).numpy()
        return int(action)

In [92]:
def run_eval_episode(eval_env, epsilon_eval=0.00):
    state, info = eval_env.reset()
    done = False
    total_reward = 0.0

    while not done:
        if np.random.rand() < epsilon_eval:
            action = eval_env.action_space.sample()
        else:
            state_tensor = tf.convert_to_tensor(state[None, :], dtype=tf.float32)
            q_values = q_network(state_tensor)
            action = int(tf.argmax(q_values[0]).numpy())
        next_state, reward, terminated, truncated, info = eval_env.step(action)
        done = terminated or truncated

        total_reward += reward
        state = next_state

    return total_reward


In [93]:

def update_target_network():
    target_q_network.set_weights(q_network.get_weights())


In [94]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
gamma = 0.99

@tf.function
def train_step(states, actions, rewards, next_states, dones):

    next_q_values = target_q_network(next_states)
    max_next_q = tf.reduce_max(next_q_values, axis=1)
    target_q = rewards + gamma * (1.0 - dones) * max_next_q

    with tf.GradientTape() as tape:
        all_q = q_network(states)
        batch_indices = tf.range(tf.shape(actions)[0], dtype=tf.int32)
        indices = tf.stack([batch_indices, actions], axis=1)
        chosen_q = tf.gather_nd(all_q, indices)
        loss = tf.reduce_mean(tf.square(target_q - chosen_q))
    grads = tape.gradient(loss, q_network.trainable_variables)
    optimizer.apply_gradients(zip(grads, q_network.trainable_variables))

    return loss


In [95]:
num_episodes = 500
eval_interval=50
batch_size = 64

epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995

target_update_freq = 1000
max_steps_per_episode = 1000

total_steps = 0
eval_rewards=[]

buffer = ReplayBuffer(max_size=100_000, state_dim=8)
for episode in range(num_episodes):
    state, info = env.reset()
    episode_reward = 0.0
    done = False

    for t in range(max_steps_per_episode):
        action = select_action(state, epsilon)
        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        buffer.add(state, action, reward, next_state, float(done))

        state = next_state
        episode_reward += reward
        total_steps += 1
        if len(buffer) >= batch_size:
            states, actions, rewards, next_states, dones = buffer.sample(batch_size)
            loss = train_step(states, actions, rewards, next_states, dones)
        else:
            loss = None

        if total_steps % target_update_freq == 0:
            update_target_network()

        if done:
            break
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay
        epsilon = max(epsilon, epsilon_min)

    if loss is not None:
        print(f"Episode {episode} | Reward: {episode_reward:.2f} | Epsilon: {epsilon:.3f} | Loss: {loss:.4f}")
    else:
        print(f"Episode {episode} | Reward: {episode_reward:.2f} | Epsilon: {epsilon:.3f} | Loss: N/A (buffer warming)")
    if (episode + 1) % eval_interval == 0:
        eval_reward = run_eval_episode(eval_env, epsilon_eval=0.05)
        eval_rewards.append(eval_reward)
        print(f"[EVAL] Episode {episode + 1} | Eval reward: {eval_reward:.2f} (video saved)")


Episode 0 | Reward: -81.50 | Epsilon: 0.995 | Loss: 161.5722
Episode 1 | Reward: -86.25 | Epsilon: 0.990 | Loss: 123.4518
Episode 2 | Reward: -139.89 | Epsilon: 0.985 | Loss: 179.5378
Episode 3 | Reward: -391.50 | Epsilon: 0.980 | Loss: 15.4249
Episode 4 | Reward: -109.10 | Epsilon: 0.975 | Loss: 149.8950
Episode 5 | Reward: -166.17 | Epsilon: 0.970 | Loss: 257.6624
Episode 6 | Reward: -304.76 | Epsilon: 0.966 | Loss: 18.1936
Episode 7 | Reward: -166.46 | Epsilon: 0.961 | Loss: 79.9845
Episode 8 | Reward: -101.26 | Epsilon: 0.956 | Loss: 106.1237
Episode 9 | Reward: -110.95 | Epsilon: 0.951 | Loss: 10.6018
Episode 10 | Reward: -474.61 | Epsilon: 0.946 | Loss: 6.7218
Episode 11 | Reward: -140.45 | Epsilon: 0.942 | Loss: 23.3369
Episode 12 | Reward: -269.58 | Epsilon: 0.937 | Loss: 149.6623
Episode 13 | Reward: -121.64 | Epsilon: 0.932 | Loss: 200.5397
Episode 14 | Reward: -95.35 | Epsilon: 0.928 | Loss: 5.1047
Episode 15 | Reward: -243.21 | Epsilon: 0.923 | Loss: 38.5859
Episode 16 | Re

                                                                           

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-0.mp4
[EVAL] Episode 50 | Eval reward: -228.88 (video saved)
Episode 50 | Reward: -90.86 | Epsilon: 0.774 | Loss: 16.2148
Episode 51 | Reward: -122.66 | Epsilon: 0.771 | Loss: 37.9615
Episode 52 | Reward: -275.44 | Epsilon: 0.767 | Loss: 5.3174
Episode 53 | Reward: -172.05 | Epsilon: 0.763 | Loss: 1.6016
Episode 54 | Reward: -58.09 | Epsilon: 0.759 | Loss: 1.1188
Episode 55 | Reward: -153.81 | Epsilon: 0.755 | Loss: 7.0894
Episode 56 | Reward: -92.64 | Epsilon: 0.751 | Loss: 50.7046
Episode 57 | Reward: -69.09 | Epsilon: 0.748 | Loss: 3.7825
Episode 58 | Reward: -52.19 | Epsilon: 0.744 | Loss: 18.9298
Episode 59 | Reward: -94.46 | Epsilon: 0.740 | Loss: 1.6457
Episode 60 | Reward: -86.94 | Epsilon: 0.737 | Loss: 27.0814
Episode 61 | Reward: -67.84 | Epsilon: 0.733 | Loss: 19.7442
Episode 62 | Reward: -94.45 | Epsilon: 0.729 | Loss: 2.3762
Episode 63 | Reward: -96.18 | Epsilon: 0.726 | L

                                                                           

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-1.mp4
[EVAL] Episode 100 | Eval reward: -172.25 (video saved)
Episode 100 | Reward: -86.14 | Epsilon: 0.603 | Loss: 8.8303
Episode 101 | Reward: -65.34 | Epsilon: 0.600 | Loss: 1.5345
Episode 102 | Reward: -36.14 | Epsilon: 0.597 | Loss: 18.7611
Episode 103 | Reward: -1.31 | Epsilon: 0.594 | Loss: 2.4431
Episode 104 | Reward: -34.91 | Epsilon: 0.591 | Loss: 20.0691
Episode 105 | Reward: -43.23 | Epsilon: 0.588 | Loss: 24.0197
Episode 106 | Reward: -37.51 | Epsilon: 0.585 | Loss: 1.6436
Episode 107 | Reward: -60.54 | Epsilon: 0.582 | Loss: 81.1465
Episode 108 | Reward: -63.86 | Epsilon: 0.579 | Loss: 4.0413
Episode 109 | Reward: -70.35 | Epsilon: 0.576 | Loss: 17.0482
Episode 110 | Reward: -54.68 | Epsilon: 0.573 | Loss: 3.5820
Episode 111 | Reward: -121.32 | Epsilon: 0.570 | Loss: 1.3338
Episode 112 | Reward: -82.01 | Epsilon: 0.568 | Loss: 8.2387
Episode 113 | Reward: -29.78 | Epsilon:

                                                                           

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-2.mp4
[EVAL] Episode 150 | Eval reward: -192.67 (video saved)
Episode 150 | Reward: -31.98 | Epsilon: 0.469 | Loss: 13.1516
Episode 151 | Reward: -83.21 | Epsilon: 0.467 | Loss: 7.1914
Episode 152 | Reward: -0.64 | Epsilon: 0.464 | Loss: 2.1572
Episode 153 | Reward: -7.27 | Epsilon: 0.462 | Loss: 10.2138
Episode 154 | Reward: -23.37 | Epsilon: 0.460 | Loss: 11.0302
Episode 155 | Reward: 19.65 | Epsilon: 0.458 | Loss: 74.1220
Episode 156 | Reward: -278.13 | Epsilon: 0.455 | Loss: 9.3705
Episode 157 | Reward: -11.22 | Epsilon: 0.453 | Loss: 1.2156
Episode 158 | Reward: -53.24 | Epsilon: 0.451 | Loss: 1.7793
Episode 159 | Reward: -23.82 | Epsilon: 0.448 | Loss: 14.2870
Episode 160 | Reward: -0.12 | Epsilon: 0.446 | Loss: 2.4515
Episode 161 | Reward: -352.99 | Epsilon: 0.444 | Loss: 7.2141
Episode 162 | Reward: -30.91 | Epsilon: 0.442 | Loss: 29.0596
Episode 163 | Reward: -493.33 | Epsilon:

                                                                          

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-3.mp4
[EVAL] Episode 200 | Eval reward: -187.14 (video saved)
Episode 200 | Reward: -26.48 | Epsilon: 0.365 | Loss: 2.3645
Episode 201 | Reward: -1.58 | Epsilon: 0.363 | Loss: 10.4835
Episode 202 | Reward: -35.97 | Epsilon: 0.361 | Loss: 3.6910
Episode 203 | Reward: -217.75 | Epsilon: 0.360 | Loss: 6.3180
Episode 204 | Reward: -81.06 | Epsilon: 0.358 | Loss: 9.0419
Episode 205 | Reward: -70.87 | Epsilon: 0.356 | Loss: 7.3839
Episode 206 | Reward: -47.83 | Epsilon: 0.354 | Loss: 16.0178
Episode 207 | Reward: 5.56 | Epsilon: 0.353 | Loss: 4.4203
Episode 208 | Reward: 20.93 | Epsilon: 0.351 | Loss: 25.5949
Episode 209 | Reward: -185.99 | Epsilon: 0.349 | Loss: 12.9732
Episode 210 | Reward: -24.80 | Epsilon: 0.347 | Loss: 8.2924
Episode 211 | Reward: -37.33 | Epsilon: 0.346 | Loss: 3.1501
Episode 212 | Reward: -135.53 | Epsilon: 0.344 | Loss: 2.7654
Episode 213 | Reward: -190.26 | Epsilon: 

                                                                           

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-4.mp4
[EVAL] Episode 250 | Eval reward: -46.41 (video saved)
Episode 250 | Reward: -42.90 | Epsilon: 0.284 | Loss: 21.7129
Episode 251 | Reward: -37.01 | Epsilon: 0.283 | Loss: 11.9012
Episode 252 | Reward: -42.16 | Epsilon: 0.281 | Loss: 5.9068
Episode 253 | Reward: 22.23 | Epsilon: 0.280 | Loss: 12.0365
Episode 254 | Reward: -44.81 | Epsilon: 0.279 | Loss: 3.1618
Episode 255 | Reward: -35.23 | Epsilon: 0.277 | Loss: 2.5025
Episode 256 | Reward: -54.97 | Epsilon: 0.276 | Loss: 2.7935
Episode 257 | Reward: 53.75 | Epsilon: 0.274 | Loss: 5.2760
Episode 258 | Reward: -23.59 | Epsilon: 0.273 | Loss: 3.9807
Episode 259 | Reward: -0.16 | Epsilon: 0.272 | Loss: 7.7497
Episode 260 | Reward: 55.16 | Epsilon: 0.270 | Loss: 2.1197
Episode 261 | Reward: 17.22 | Epsilon: 0.269 | Loss: 2.9990
Episode 262 | Reward: -23.96 | Epsilon: 0.268 | Loss: 18.5631
Episode 263 | Reward: -7.19 | Epsilon: 0.266 |

                                                                           

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-5.mp4
[EVAL] Episode 300 | Eval reward: 1.45 (video saved)
Episode 300 | Reward: -3.71 | Epsilon: 0.221 | Loss: 1.8459
Episode 301 | Reward: 75.09 | Epsilon: 0.220 | Loss: 9.4666
Episode 302 | Reward: 190.94 | Epsilon: 0.219 | Loss: 1.7956
Episode 303 | Reward: -47.37 | Epsilon: 0.218 | Loss: 2.3993
Episode 304 | Reward: 156.65 | Epsilon: 0.217 | Loss: 3.4761
Episode 305 | Reward: 61.46 | Epsilon: 0.216 | Loss: 9.4229
Episode 306 | Reward: 48.67 | Epsilon: 0.215 | Loss: 12.6329
Episode 307 | Reward: 87.80 | Epsilon: 0.214 | Loss: 49.5325
Episode 308 | Reward: 161.95 | Epsilon: 0.212 | Loss: 8.0946
Episode 309 | Reward: -212.51 | Epsilon: 0.211 | Loss: 7.0326
Episode 310 | Reward: -3.28 | Epsilon: 0.210 | Loss: 3.7257
Episode 311 | Reward: -42.57 | Epsilon: 0.209 | Loss: 4.7417
Episode 312 | Reward: 239.70 | Epsilon: 0.208 | Loss: 4.2253
Episode 313 | Reward: 206.36 | Epsilon: 0.207 | Lo

                                                                          

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-6.mp4
[EVAL] Episode 350 | Eval reward: 211.66 (video saved)
Episode 350 | Reward: 297.68 | Epsilon: 0.172 | Loss: 2.6662
Episode 351 | Reward: 224.34 | Epsilon: 0.171 | Loss: 4.8984
Episode 352 | Reward: 255.46 | Epsilon: 0.170 | Loss: 0.9092
Episode 353 | Reward: 194.44 | Epsilon: 0.170 | Loss: 17.1663
Episode 354 | Reward: 241.71 | Epsilon: 0.169 | Loss: 2.0458
Episode 355 | Reward: 228.23 | Epsilon: 0.168 | Loss: 6.6949
Episode 356 | Reward: 260.36 | Epsilon: 0.167 | Loss: 2.2194
Episode 357 | Reward: 69.84 | Epsilon: 0.166 | Loss: 2.0118
Episode 358 | Reward: -4.00 | Epsilon: 0.165 | Loss: 98.0864
Episode 359 | Reward: -21.26 | Epsilon: 0.165 | Loss: 1.4988
Episode 360 | Reward: 237.84 | Epsilon: 0.164 | Loss: 3.6646
Episode 361 | Reward: 109.43 | Epsilon: 0.163 | Loss: 2.6557
Episode 362 | Reward: 24.76 | Epsilon: 0.162 | Loss: 2.2933
Episode 363 | Reward: -109.95 | Epsilon: 0.161

                                                                          

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-7.mp4
[EVAL] Episode 400 | Eval reward: 231.03 (video saved)
Episode 400 | Reward: 285.04 | Epsilon: 0.134 | Loss: 263.7839
Episode 401 | Reward: 228.19 | Epsilon: 0.133 | Loss: 1.2424
Episode 402 | Reward: 270.99 | Epsilon: 0.133 | Loss: 2.6755
Episode 403 | Reward: 286.04 | Epsilon: 0.132 | Loss: 1.9913
Episode 404 | Reward: 238.95 | Epsilon: 0.131 | Loss: 3.4222
Episode 405 | Reward: 268.52 | Epsilon: 0.131 | Loss: 2.3286
Episode 406 | Reward: 245.02 | Epsilon: 0.130 | Loss: 13.6796
Episode 407 | Reward: 252.38 | Epsilon: 0.129 | Loss: 1.1474
Episode 408 | Reward: 185.92 | Epsilon: 0.129 | Loss: 2.0528
Episode 409 | Reward: 259.38 | Epsilon: 0.128 | Loss: 5.0820
Episode 410 | Reward: 287.88 | Epsilon: 0.127 | Loss: 2.5025
Episode 411 | Reward: 268.89 | Epsilon: 0.127 | Loss: 0.7800
Episode 412 | Reward: 235.96 | Epsilon: 0.126 | Loss: 3.8641
Episode 413 | Reward: -50.17 | Epsilon: 0.

                                                                        

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-8.mp4
[EVAL] Episode 450 | Eval reward: 258.57 (video saved)
Episode 450 | Reward: 187.50 | Epsilon: 0.104 | Loss: 1.0394
Episode 451 | Reward: 184.56 | Epsilon: 0.104 | Loss: 1.8472
Episode 452 | Reward: 266.25 | Epsilon: 0.103 | Loss: 29.7632
Episode 453 | Reward: 249.01 | Epsilon: 0.103 | Loss: 2.1121
Episode 454 | Reward: -198.39 | Epsilon: 0.102 | Loss: 2.5302
Episode 455 | Reward: 282.55 | Epsilon: 0.102 | Loss: 1.8449
Episode 456 | Reward: 41.89 | Epsilon: 0.101 | Loss: 3.3463
Episode 457 | Reward: 225.19 | Epsilon: 0.101 | Loss: 1.5778
Episode 458 | Reward: 280.50 | Epsilon: 0.100 | Loss: 2.9231
Episode 459 | Reward: -110.95 | Epsilon: 0.100 | Loss: 1.1553
Episode 460 | Reward: 297.22 | Epsilon: 0.099 | Loss: 1.7538
Episode 461 | Reward: 271.47 | Epsilon: 0.099 | Loss: 2.3366
Episode 462 | Reward: 271.78 | Epsilon: 0.098 | Loss: 5.1636
Episode 463 | Reward: 287.79 | Epsilon: 0.0

                                                                          

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-9.mp4
[EVAL] Episode 500 | Eval reward: -18.13 (video saved)




In [96]:
q_network.save("q_network.h5")
target_q_network.save("target_q_network.h5")



In [None]:
def evaluate_policy(env, num_episodes=10, epsilon_eval=0.00):
    rewards = []
    for ep in range(num_episodes):
        state, info = env.reset()
        done = False
        total_reward = 0.0

        while not done:
            if np.random.rand() < epsilon_eval:
                action = env.action_space.sample()
            else:
                state_tensor = tf.convert_to_tensor(state[None, :], dtype=tf.float32)
                q_values = q_network(state_tensor)
                action = int(tf.argmax(q_values[0]).numpy())

            next_state, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            total_reward += reward
            state = next_state

        rewards.append(total_reward)
        print(f"Eval Episode {ep} | Reward: {total_reward:.2f}")

    print(f"\nAverage eval reward over {num_episodes} episodes: {np.mean(rewards):.2f}")
    return rewards
eval_rewards = evaluate_policy(eval_env, num_episodes=10, epsilon_eval=0.05)


MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-20.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-20.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-20.mp4
Eval Episode 0 | Reward: 243.26
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-21.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-21.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-21.mp4
Eval Episode 1 | Reward: -14.36
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-22.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-22.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-22.mp4
Eval Episode 2 | Reward: 247.57
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-23.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-23.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-23.mp4
Eval Episode 3 | Reward: 241.84
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-24.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-24.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-24.mp4
Eval Episode 4 | Reward: 239.17
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-25.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-25.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-25.mp4
Eval Episode 5 | Reward: 230.09
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-26.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-26.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-26.mp4
Eval Episode 6 | Reward: 259.11
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-27.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-27.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-27.mp4
Eval Episode 7 | Reward: 260.71
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-28.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-28.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-28.mp4
Eval Episode 8 | Reward: 276.04
MoviePy - Building video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-29.mp4.
MoviePy - Writing video c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-29.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready c:\Users\hp\Documents\tf_gpu\rl_rev\videos\dqn_lander-episode-29.mp4
Eval Episode 9 | Reward: 265.91

Average eval reward over 10 episodes: 224.93
