# LunarLander

---

# Game Setup

In [1]:
import warnings
warnings.simplefilter("ignore")

In [2]:
# Import game environment
import gym

In [3]:
# Environment
env = gym.make("LunarLander-v2")

In [4]:
# Number of actions
env.action_space

Discrete(4)

In [None]:
# Game with random actions
episodes = 5

for episode in range(episodes):
    state = env.reset()
    done = False
    
    while not done:
        new_state, reward, done, info = env.step(env.action_space.sample())
        env.render()

In [None]:
# Closing environment
env.close()

# Preprocessing Environment

In [5]:
state = env.reset()
print("dimensions =" ,state.shape)

dimensions = (8,)


In [6]:
from stable_baselines3.common.vec_env import DummyVecEnv

# Wraping inside dummy environment
env = DummyVecEnv([lambda: env])

state = env.reset()
print("dimensions =" ,state.shape)

dimensions = (1, 8)


# Training The RL Model

In [7]:
from stable_baselines3 import PPO

# Creating PPO model
model = PPO("MlpPolicy", env, verbose=1)

Using cpu device


In [8]:
model.learn(total_timesteps=100_000)

-----------------------------
| time/              |      |
|    fps             | 2219 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1551        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.007338364 |
|    clip_fraction        | 0.0277      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.000822    |
|    learning_rate        | 0.0003      |
|    loss                 | 520         |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00667    |
|    value_loss           | 1.48e+03    |
-----------------------------------------
----------------------------------

-----------------------------------------
| time/                   |             |
|    fps                  | 1105        |
|    iterations           | 13          |
|    time_elapsed         | 24          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.006761041 |
|    clip_fraction        | 0.0258      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.14       |
|    explained_variance   | 0.207       |
|    learning_rate        | 0.0003      |
|    loss                 | 201         |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00754    |
|    value_loss           | 279         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1065        |
|    iterations           | 14          |
|    time_elapsed         | 26          |
|    total_timesteps      | 28672 

-----------------------------------------
| time/                   |             |
|    fps                  | 908         |
|    iterations           | 24          |
|    time_elapsed         | 54          |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.016596578 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | 0.588       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.8        |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.00844    |
|    value_loss           | 51.5        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 895          |
|    iterations           | 25           |
|    time_elapsed         | 57           |
|    total_timesteps      | 5

-----------------------------------------
| time/                   |             |
|    fps                  | 820         |
|    iterations           | 35          |
|    time_elapsed         | 87          |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.005761316 |
|    clip_fraction        | 0.0264      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.988      |
|    explained_variance   | 0.271       |
|    learning_rate        | 0.0003      |
|    loss                 | 46.7        |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.00357    |
|    value_loss           | 138         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 815         |
|    iterations           | 36          |
|    time_elapsed         | 90          |
|    total_timesteps      | 73728 

-----------------------------------------
| time/                   |             |
|    fps                  | 798         |
|    iterations           | 46          |
|    time_elapsed         | 117         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.006476188 |
|    clip_fraction        | 0.0416      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.888      |
|    explained_variance   | 0.443       |
|    learning_rate        | 0.0003      |
|    loss                 | 23.4        |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.00438    |
|    value_loss           | 191         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 796          |
|    iterations           | 47           |
|    time_elapsed         | 120          |
|    total_timesteps      | 9

<stable_baselines3.ppo.ppo.PPO at 0x1d3ee8fa100>

In [None]:
model.save("best_model_iter200000")

# Testing The Model

In [None]:
model = model.load("best_model_iter200000")

In [9]:
# Starting the game
state = env.reset()
# Loop through the game
for episode in range(2000):
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()
    

In [10]:
env.close()