In [11]:
import os
import gym
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.evaluation import evaluate_policy


In [12]:
class Algorithm:
    def __init__(self, env_name):
        env_path = os.path.join("Training", env_name)
        log_path = os.path.join(env_path, "Logs")
        self.PPO_path = os.path.join(env_path, "Saved Models", "PPO")
        self.DQN_path = os.path.join(env_path, "Saved Models", "DQN")
        self.A2C_path = os.path.join(env_path, "Saved Models", "A2C")

        self.env = gym.make(env_name)
        self.PPO_model = PPO("MlpPolicy", self.env, verbose=1, tensorboard_log=log_path)
        self.DQN_model = DQN("MlpPolicy", self.env, verbose=1, tensorboard_log=log_path)
        self.A2C_model = A2C("MlpPolicy", self.env, verbose=1, tensorboard_log=log_path)

    def learn(self, timesteps=100000, save=True):
        self.PPO_model.learn(timesteps)
        self.DQN_model.learn(timesteps)
        self.A2C_model.learn(timesteps)
        
        if save:
            self.save()

    def save(self):
        self.PPO_model.save(self.PPO_path)
        self.DQN_model.save(self.DQN_path)
        self.A2C_model.save(self.A2C_path)

    def load(self):
        self.PPO_model = PPO.load(self.PPO_path, env=self.env)
        self.DQN_model = DQN.load(self.DQN_path, env=self.env)
        self.A2C_model = A2C.load(self.A2C_path, env=self.env)

    def evaluate(self):
        evaluate_policy(self.PPO_model, self.env, n_eval_episodes=1, render=True)
        evaluate_policy(self.DQN_model, self.env, n_eval_episodes=1, render=True)
        evaluate_policy(self.A2C_model, self.env, n_eval_episodes=1, render=True)


In [13]:
ab = Algorithm("Acrobot-v1")
cp = Algorithm("CartPole-v0")
mc = Algorithm("MountainCar-v0")

tx = Algorithm("Taxi-v3")
cw = Algorithm("CliffWalking-v0")
fl = Algorithm("FrozenLake-v1")

ll = Algorithm("LunarLander-v2")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cuda device
Wrapping the env with a `Monitor

In [None]:
ab.learn()
cp.learn()

In [None]:
mc.learn()


In [None]:
tx.learn()
cw.learn()


In [15]:
ll.learn()


Logging to Training\LunarLander-v2\Logs\PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.6     |
|    ep_rew_mean     | -168     |
| time/              |          |
|    fps             | 236      |
|    iterations      | 1        |
|    time_elapsed    | 8        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 94.6        |
|    ep_rew_mean          | -182        |
| time/                   |             |
|    fps                  | 190         |
|    iterations           | 2           |
|    time_elapsed         | 21          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011812858 |
|    clip_fraction        | 0.0396      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   



In [16]:
cp.load()
cp.evaluate()



Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [18]:
ab.load()
ab.evaluate()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [17]:
ll.load()
ll.evaluate()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [1]:
!tensorboard --logdir="./"

^C
