In [None]:
import gym
from stable_baselines3 import A2C, DQN, PPO, HER
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

In [None]:
env = gym.make(
    "LunarLander-v2",
    continuous: bool = False,
    gravity: float = -10.0,
    enable_wind: bool = False,
    wind_power: float = 15.0,
    turbulence_power: float = 1.5,
)

In [None]:
env.reset()

In [None]:
env = make_atari_env('LunarLander-v2', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=1)

In [None]:
Log_path = os.path.join('Training', 'Logs')
a2c_path = os.path.join('Training', 'A2C_model')
dqn_path = os.path.join('Training', 'DQN_Model')
ppo_path = os.path.join('Training', 'PPO_Model')

In [None]:
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=Log_path, device='cuda')
model.learn(total_timesteps=25000, log_interval=100)
model.save(a2c_path)

In [None]:
del model

In [None]:
model = DQN('CnnPolicy', env, verbose=1, tensorboard_log=Log_path, device='cuda')
model.learn(total_timesteps=25000, log_interval=100)
model.save(dqn_path)

In [None]:
del model

In [None]:
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=Log_path, device='cuda')
model.learn(total_timesteps=25000, log_interval=100)
model.save(ppo_path)

In [None]:
del model