Skip to content

Commit

Permalink
Updated notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed May 26, 2021
1 parent cfe0334 commit 31f8376
Show file tree
Hide file tree
Showing 15 changed files with 69 additions and 124 deletions.
12 changes: 6 additions & 6 deletions Chapter01/06_neural_evolutionary_agent.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Chapter02/3_temporal_difference_learning.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Chapter02/4_monte_carlo_prediction_and_control_rl.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Chapter02/5_sarsa_sarsa_lambda.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Chapter02/6_q_learning.ipynb

Large diffs are not rendered by default.

16 changes: 14 additions & 2 deletions Chapter02/7_policy_gradients.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,13 @@
"Episode#:0 ep_reward:-171.0\r",
"Episode#:0 ep_reward:-172.0\r",
"Episode#:0 ep_reward:-173.0\r",
"Episode#:0 ep_reward:-174.0\r",
"Episode#:0 ep_reward:-174.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:0 ep_reward:-175.0\r",
"Episode#:0 ep_reward:-176.0\r",
"Episode#:0 ep_reward:-177.0\r",
Expand Down Expand Up @@ -540,7 +546,13 @@
"Episode#:1 ep_reward:-174.0\r",
"Episode#:1 ep_reward:-175.0\r",
"Episode#:1 ep_reward:-176.0\r",
"Episode#:1 ep_reward:-177.0\r",
"Episode#:1 ep_reward:-177.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:1 ep_reward:-178.0\r",
"Episode#:1 ep_reward:-179.0\r",
"Episode#:1 ep_reward:-180.0\r",
Expand Down
91 changes: 13 additions & 78 deletions Chapter02/8_actor_critic_agent.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -161,116 +161,51 @@
"Episode#:0 ep_reward:7.0\r",
"Episode#:0 ep_reward:8.0\r",
"Episode#:0 ep_reward:9.0\r",
"Episode#:0 ep_reward:10.0\r",
"Episode#:0 ep_reward:10.0\r\n",
"\n",
"Episode#:0 ep_reward:11.0\r",
"Episode#:0 ep_reward:12.0\r",
"Episode#:0 ep_reward:13.0\r",
"Episode#:0 ep_reward:14.0\r",
"Episode#:0 ep_reward:15.0\r",
"Episode#:0 ep_reward:16.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:0 ep_reward:17.0\r",
"Episode#:0 ep_reward:18.0\r",
"Episode#:0 ep_reward:19.0\r",
"Episode#:0 ep_reward:20.0\r",
"Episode#:0 ep_reward:21.0\r",
"Episode#:0 ep_reward:22.0\r",
"Episode#:0 ep_reward:23.0\r",
"Episode#:0 ep_reward:24.0\r"
"Episode#:1 ep_reward:1.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Episode#:0 ep_reward:25.0\r",
"Episode#:1 ep_reward:1.0\r",
"Episode#:1 ep_reward:2.0\r",
"Episode#:1 ep_reward:3.0\r",
"Episode#:1 ep_reward:4.0\r",
"Episode#:1 ep_reward:5.0\r",
"Episode#:1 ep_reward:6.0\r",
"Episode#:1 ep_reward:7.0\r",
"Episode#:1 ep_reward:8.0\r",
"Episode#:1 ep_reward:9.0\r",
"Episode#:1 ep_reward:10.0\r",
"Episode#:1 ep_reward:11.0\r",
"Episode#:1 ep_reward:12.0\r",
"Episode#:1 ep_reward:13.0\r",
"Episode#:1 ep_reward:14.0\r",
"Episode#:1 ep_reward:15.0\r"
"Episode#:1 ep_reward:9.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:1 ep_reward:10.0\r",
"Episode#:1 ep_reward:11.0\r",
"Episode#:1 ep_reward:12.0\r",
"Episode#:1 ep_reward:13.0\r",
"Episode#:1 ep_reward:14.0\r",
"Episode#:1 ep_reward:15.0\r",
"Episode#:1 ep_reward:16.0\r",
"Episode#:1 ep_reward:17.0\r",
"Episode#:1 ep_reward:18.0\r",
"Episode#:1 ep_reward:19.0\r",
"Episode#:1 ep_reward:20.0\r",
"Episode#:1 ep_reward:21.0\r",
"Episode#:1 ep_reward:22.0\r",
"Episode#:1 ep_reward:23.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:1 ep_reward:24.0\r",
"Episode#:1 ep_reward:25.0\r",
"Episode#:1 ep_reward:26.0\r",
"Episode#:1 ep_reward:27.0\r",
"Episode#:1 ep_reward:28.0\r",
"Episode#:1 ep_reward:29.0\r",
"Episode#:1 ep_reward:30.0\r",
"Episode#:1 ep_reward:31.0\r",
"Episode#:1 ep_reward:32.0\r",
"Episode#:1 ep_reward:33.0\r",
"Episode#:1 ep_reward:34.0\r",
"Episode#:1 ep_reward:35.0\r",
"Episode#:1 ep_reward:36.0\r",
"Episode#:1 ep_reward:37.0\r",
"Episode#:1 ep_reward:38.0\r",
"Episode#:1 ep_reward:39.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:1 ep_reward:40.0\r",
"Episode#:1 ep_reward:41.0\r",
"Episode#:1 ep_reward:42.0\r",
"Episode#:1 ep_reward:43.0\r",
"Episode#:1 ep_reward:44.0\r",
"Episode#:1 ep_reward:45.0\r",
"Episode#:1 ep_reward:46.0\r",
"Episode#:1 ep_reward:47.0\r"
"Episode#:1 ep_reward:21.0\r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#:1 ep_reward:48.0\r",
"Episode#:1 ep_reward:49.0\r",
"Episode#:1 ep_reward:50.0\r",
"Episode#:1 ep_reward:51.0\r",
"Episode#:1 ep_reward:52.0\r",
"Episode#:1 ep_reward:53.0\r",
"Episode#:1 ep_reward:54.0\r\n",
"\n",
"Episode#:1 ep_reward:55.0\r"
"\n",
"Episode#:1 ep_reward:22.0\r"
]
}
],
Expand Down
8 changes: 4 additions & 4 deletions Chapter03/1_double_dqn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20210524-054846\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20210526-035407\n"
]
}
],
Expand Down Expand Up @@ -195,7 +195,7 @@
" action = self.model.get_action(observation)\n",
" next_observation, reward, done, _ = self.env.step(action)\n",
" self.buffer.store(\n",
" observation, action, reward * 0.01, next_observation, done\n",
" observation, action, reward, next_observation, done\n",
" )\n",
" episode_reward += reward\n",
" observation = next_observation\n",
Expand All @@ -216,14 +216,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:21.0\n"
"Episode#0 Reward:10.0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#1 Reward:40.0\n"
"Episode#1 Reward:14.0\n"
]
}
],
Expand Down
6 changes: 3 additions & 3 deletions Chapter03/1_dqn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20210524-054851\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20210526-035412\n"
]
}
],
Expand Down Expand Up @@ -221,14 +221,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:13.0\n"
"Episode#0 Reward:31.0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#1 Reward:30.0\n"
"Episode#1 Reward:36.0\n"
]
}
],
Expand Down
6 changes: 3 additions & 3 deletions Chapter03/2_dueling_dqn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDQN/CartPole-v0/20210524-054842\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDQN/CartPole-v0/20210526-035403\n"
]
}
],
Expand Down Expand Up @@ -225,14 +225,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:22.0\n"
"Episode#0 Reward:19.0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#1 Reward:14.0\n"
"Episode#1 Reward:13.0\n"
]
}
],
Expand Down
6 changes: 3 additions & 3 deletions Chapter03/3_dueling_double_dqn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDoubleDQN/CartPole-v0/20210524-054804\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDoubleDQN/CartPole-v0/20210526-035318\n"
]
}
],
Expand Down Expand Up @@ -223,14 +223,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:18.0\n"
"Episode#0 Reward:10.0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#1 Reward:23.0\n"
"Episode#1 Reward:24.0\n"
]
}
],
Expand Down
6 changes: 3 additions & 3 deletions Chapter03/4_drqn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DRQN/CartPole-v0/20210524-054808\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DRQN/CartPole-v0/20210526-035322\n"
]
}
],
Expand Down Expand Up @@ -238,14 +238,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:19.0\n"
"Episode#0 Reward:14.0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#1 Reward:27.0\n"
"Episode#1 Reward:28.0\n"
]
}
],
Expand Down
18 changes: 8 additions & 10 deletions Chapter03/5_a3c_continuous.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-A3C/MountainCarContinuous-v0/20210524-054855\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-A3C/MountainCarContinuous-v0/20210526-035417\n"
]
}
],
Expand Down Expand Up @@ -300,9 +300,7 @@
" actions = np.array([action.squeeze() for action in action_batch])\n",
" rewards = np.array([reward.squeeze() for reward in reward_batch])\n",
" next_v_value = self.critic.model.predict(next_state)\n",
" td_targets = self.n_step_td_target(\n",
" (rewards + 8) / 8, next_v_value, done\n",
" )\n",
" td_targets = self.n_step_td_target(rewards, next_v_value, done)\n",
" advantages = td_targets - self.critic.model.predict(states)\n",
"\n",
" actor_loss = self.global_actor.train(states, actions, advantages)\n",
Expand Down Expand Up @@ -337,36 +335,36 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:-38.682680223719295\n",
"Episode#1 Reward:-39.445512783584974\n"
"Episode#0 Reward:-31.801682595606632\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#2 Reward:-39.93089560087406\n"
"Episode#1 Reward:-30.892560106187222\n",
"Episode#2 Reward:-33.11217727229108\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#3 Reward:-38.27680474153451\n"
"Episode#3 Reward:-31.588685807062376\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#4 Reward:-57.081332366974294\n"
"Episode#4 Reward:-2.0538625282953777\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#5 Reward:-57.836438562657676\n"
"Episode#5 Reward:-2.186204400033226\n"
]
}
],
Expand Down
6 changes: 3 additions & 3 deletions Chapter03/6_ppo_continuous.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Saving training logs to:logs/TFRL-Cookbook-Ch3-PPO/Pendulum-v0/20210524-054813\n"
"Saving training logs to:logs/TFRL-Cookbook-Ch3-PPO/Pendulum-v0/20210526-035328\n"
]
}
],
Expand Down Expand Up @@ -310,14 +310,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#0 Reward:-1753.283954487982\n"
"Episode#0 Reward:-1444.8163136115284\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode#1 Reward:-1715.6088237340073\n"
"Episode#1 Reward:-1890.140398895494\n"
]
}
],
Expand Down
Loading

0 comments on commit 31f8376

Please sign in to comment.