# 0. Install and Import Dependencies

In [None]:
!pip install tensorflow==1.15.0 tensorflow-gpu==1.15.0 stable_baselines gym box2d-py --user

In [2]:
import gym 
from stable_baselines import ACER
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.evaluation import evaluate_policy

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
environment_name = 'LunarLander-v2'

# 1. Test Random Environment

In [5]:
env = gym.make(environment_name)

In [6]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:-117.42800566285462
Episode:2 Score:-221.5901611372018
Episode:3 Score:-103.02637740513153
Episode:4 Score:-114.56307978687623
Episode:5 Score:-401.2705869759806
Episode:6 Score:-122.73167364496194
Episode:7 Score:-198.68632709242547
Episode:8 Score:-127.68905207801089
Episode:9 Score:-258.4556793059761
Episode:10 Score:-121.32792106934696


# 2. Build and Train the Model

In [13]:
env = gym.make(environment_name)
env = DummyVecEnv([lambda: env])
model = ACER('MlpPolicy', env, verbose = 1)

In [14]:
model.learn(total_timesteps=100000)

----------------------------------
| avg_norm_adj        | 4.45     |
| avg_norm_g          | 18.8     |
| avg_norm_grads_f    | 16.3     |
| avg_norm_k          | 2        |
| avg_norm_k_dot_g    | 18.8     |
| entropy             | 29.1     |
| explained_variance  | 2.52e-05 |
| fps                 | 0        |
| loss                | 0.978    |
| loss_bc             | -0       |
| loss_f              | -6.48    |
| loss_policy         | -6.48    |
| loss_q              | 15.5     |
| mean_episode_length | 0        |
| mean_episode_reward | 0        |
| norm_grads          | 5.87     |
| norm_grads_policy   | 4.4      |
| norm_grads_q        | 3.89     |
| total_timesteps     | 20       |
----------------------------------
----------------------------------
| avg_norm_adj        | 68.3     |
| avg_norm_g          | 275      |
| avg_norm_grads_f    | 238      |
| avg_norm_k          | 2.01     |
| avg_norm_k_dot_g    | 277      |
| entropy             | 29       |
| explained_variance

----------------------------------
| avg_norm_adj        | 0.933    |
| avg_norm_g          | 8.26     |
| avg_norm_grads_f    | 7.55     |
| avg_norm_k          | 1.95     |
| avg_norm_k_dot_g    | 7.67     |
| entropy             | 11.4     |
| explained_variance  | 0.748    |
| fps                 | 317      |
| loss                | 2.74     |
| loss_bc             | -0       |
| loss_f              | -0.485   |
| loss_policy         | -0.485   |
| loss_q              | 6.67     |
| mean_episode_length | 314      |
| mean_episode_reward | -107     |
| norm_grads          | 21.5     |
| norm_grads_policy   | 7.67     |
| norm_grads_q        | 20.1     |
| total_timesteps     | 24020    |
----------------------------------
----------------------------------
| avg_norm_adj        | 1.94     |
| avg_norm_g          | 17       |
| avg_norm_grads_f    | 15.7     |
| avg_norm_k          | 2.3      |
| avg_norm_k_dot_g    | 15.7     |
| entropy             | 12.5     |
| explained_variance

KeyboardInterrupt: 

# 3. Save and Test the Model

In [16]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)
env.close()

In [20]:
model.save("ACER_model")

In [21]:
del model

In [25]:
model = ACER.load("ACER_model", env=env)

In [None]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()
    

In [27]:
env.close()