# Escolher algum ambiente

Foi escolhido como ambiente de estudo o '**Intersection**', disponível em: http://highway-env.farama.org/environments/intersection/

In [3]:
import gymnasium as gym

env = gym.make("intersection-v1", render_mode='rgb_array')
env.unwrapped.config

{'observation': {'type': 'Kinematics',
  'vehicles_count': 5,
  'features': ['presence',
   'x',
   'y',
   'vx',
   'vy',
   'long_off',
   'lat_off',
   'ang_off']},
 'action': {'type': 'ContinuousAction',
  'steering_range': [-1.0471975511965976, 1.0471975511965976],
  'longitudinal': True,
  'lateral': True,
  'dynamical': True},
 'simulation_frequency': 15,
 'policy_frequency': 1,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'screen_width': 600,
 'screen_height': 600,
 'centering_position': [0.5, 0.6],
 'scaling': 7.15,
 'show_trajectories': False,
 'render_agent': True,
 'offscreen_rendering': False,
 'manual_control': False,
 'real_time_rendering': False,
 'duration': 13,
 'destination': 'o1',
 'controlled_vehicles': 1,
 'initial_vehicle_count': 10,
 'spawn_probability': 0.6,
 'collision_reward': -5,
 'high_speed_reward': 1,
 'arrived_reward': 1,
 'reward_speed_range': [7.0, 9.0],
 'normalize_reward': False,
 'offroad_terminal': False}

In [4]:
env.reset()

for _ in range(1_000_000):
    action = env.action_space.sample()
    obs, reward, done, truncated, info = env.step(action)
    env.render()
    if done or truncated:
        break
env.close()

# Escolher algoritmos

## DDPG (baseline)

In [5]:
from stable_baselines3 import DDPG

env = gym.make("intersection-v1", render_mode='rgb_array')

model = DDPG("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=5000, log_interval=50, progress_bar=True)
model.save("ddpg_intersection")

2024-02-26 14:58:45.990700: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-26 14:58:46.051938: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-26 14:58:46.051981: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-26 14:58:46.053853: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-26 14:58:46.064829: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructio

Output()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 7.22     |
|    ep_rew_mean     | 1.16     |
| time/              |          |
|    episodes        | 50       |
|    fps             | 4        |
|    time_elapsed    | 81       |
|    total_timesteps | 361      |
| train/             |          |
|    actor_loss      | -0.902   |
|    critic_loss     | 0.813    |
|    learning_rate   | 0.001    |
|    n_updates       | 256      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.59     |
|    ep_rew_mean     | 1.13     |
| time/              |          |
|    episodes        | 100      |
|    fps             | 4        |
|    time_elapsed    | 207      |
|    total_timesteps | 859      |
| train/             |          |
|    actor_loss      | -1.21    |
|    critic_loss     | 0.504    |
|    learning_rate   | 0.001    |
|    n_updates       | 757      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.69     |
|    ep_rew_mean     | 1.13     |
| time/              |          |
|    episodes        | 150      |
|    fps             | 4        |
|    time_elapsed    | 318      |
|    total_timesteps | 1330     |
| train/             |          |
|    actor_loss      | -1.15    |
|    critic_loss     | 0.629    |
|    learning_rate   | 0.001    |
|    n_updates       | 1220     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 10       |
|    ep_rew_mean     | 1.61     |
| time/              |          |
|    episodes        | 200      |
|    fps             | 4        |
|    time_elapsed    | 441      |
|    total_timesteps | 1859     |
| train/             |          |
|    actor_loss      | -1.22    |
|    critic_loss     | 0.756    |
|    learning_rate   | 0.001    |
|    n_updates       | 1754     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 10.4     |
|    ep_rew_mean     | 1.97     |
| time/              |          |
|    episodes        | 250      |
|    fps             | 4        |
|    time_elapsed    | 569      |
|    total_timesteps | 2370     |
| train/             |          |
|    actor_loss      | -1.35    |
|    critic_loss     | 0.652    |
|    learning_rate   | 0.001    |
|    n_updates       | 2260     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.29     |
|    ep_rew_mean     | 1.4      |
| time/              |          |
|    episodes        | 300      |
|    fps             | 4        |
|    time_elapsed    | 666      |
|    total_timesteps | 2788     |
| train/             |          |
|    actor_loss      | -1.46    |
|    critic_loss     | 0.586    |
|    learning_rate   | 0.001    |
|    n_updates       | 2678     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.23     |
|    ep_rew_mean     | 1.11     |
| time/              |          |
|    episodes        | 350      |
|    fps             | 4        |
|    time_elapsed    | 774      |
|    total_timesteps | 3293     |
| train/             |          |
|    actor_loss      | -1.43    |
|    critic_loss     | 0.733    |
|    learning_rate   | 0.001    |
|    n_updates       | 3191     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.54     |
|    ep_rew_mean     | 1.11     |
| time/              |          |
|    episodes        | 400      |
|    fps             | 4        |
|    time_elapsed    | 874      |
|    total_timesteps | 3742     |
| train/             |          |
|    actor_loss      | -1.5     |
|    critic_loss     | 0.767    |
|    learning_rate   | 0.001    |
|    n_updates       | 3639     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.66     |
|    ep_rew_mean     | 1.21     |
| time/              |          |
|    episodes        | 450      |
|    fps             | 4        |
|    time_elapsed    | 988      |
|    total_timesteps | 4259     |
| train/             |          |
|    actor_loss      | -1.57    |
|    critic_loss     | 0.69     |
|    learning_rate   | 0.001    |
|    n_updates       | 4155     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.98     |
|    ep_rew_mean     | 1.15     |
| time/              |          |
|    episodes        | 500      |
|    fps             | 4        |
|    time_elapsed    | 1091     |
|    total_timesteps | 4740     |
| train/             |          |
|    actor_loss      | -1.48    |
|    critic_loss     | 0.701    |
|    learning_rate   | 0.001    |
|    n_updates       | 4637     |
---------------------------------


In [17]:
model = DDPG.load("ddpg_intersection")

env = gym.make("intersection-v1", render_mode='rgb_array')
obs, info = env.reset()

while True:
    env.render()
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        break

env.close()

## SAC (state of art)

In [11]:
from stable_baselines3 import SAC

env = gym.make("intersection-v1", render_mode='rgb_array')

model = SAC("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=5000, log_interval=50, progress_bar=True)
model.save("sac_intersection")

Output()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5.98     |
|    ep_rew_mean     | -0.219   |
| time/              |          |
|    episodes        | 50       |
|    fps             | 3        |
|    time_elapsed    | 85       |
|    total_timesteps | 299      |
| train/             |          |
|    actor_loss      | -2.79    |
|    critic_loss     | 1.4      |
|    ent_coef        | 0.945    |
|    ent_coef_loss   | -0.184   |
|    learning_rate   | 0.0003   |
|    n_updates       | 198      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 6.39     |
|    ep_rew_mean     | 0.0546   |
| time/              |          |
|    episodes        | 100      |
|    fps             | 3        |
|    time_elapsed    | 171      |
|    total_timesteps | 639      |
| train/             |          |
|    actor_loss      | -3.82    |
|    critic_loss     | 0.95     |
|    ent_coef        | 0.852    |
|    ent_coef_loss   | -0.537   |
|    learning_rate   | 0.0003   |
|    n_updates       | 538      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 6.65     |
|    ep_rew_mean     | 0.108    |
| time/              |          |
|    episodes        | 150      |
|    fps             | 3        |
|    time_elapsed    | 259      |
|    total_timesteps | 964      |
| train/             |          |
|    actor_loss      | -4.14    |
|    critic_loss     | 1.69     |
|    ent_coef        | 0.772    |
|    ent_coef_loss   | -0.869   |
|    learning_rate   | 0.0003   |
|    n_updates       | 863      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 6.76     |
|    ep_rew_mean     | 0.0667   |
| time/              |          |
|    episodes        | 200      |
|    fps             | 3        |
|    time_elapsed    | 345      |
|    total_timesteps | 1315     |
| train/             |          |
|    actor_loss      | -4.03    |
|    critic_loss     | 1.66     |
|    ent_coef        | 0.695    |
|    ent_coef_loss   | -1.19    |
|    learning_rate   | 0.0003   |
|    n_updates       | 1214     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 7.72     |
|    ep_rew_mean     | 0.314    |
| time/              |          |
|    episodes        | 250      |
|    fps             | 3        |
|    time_elapsed    | 456      |
|    total_timesteps | 1736     |
| train/             |          |
|    actor_loss      | -4.19    |
|    critic_loss     | 1.82     |
|    ent_coef        | 0.613    |
|    ent_coef_loss   | -1.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 1635     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.51     |
|    ep_rew_mean     | 0.349    |
| time/              |          |
|    episodes        | 300      |
|    fps             | 3        |
|    time_elapsed    | 571      |
|    total_timesteps | 2166     |
| train/             |          |
|    actor_loss      | -4.36    |
|    critic_loss     | 1.32     |
|    ent_coef        | 0.54     |
|    ent_coef_loss   | -1.91    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2065     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.61     |
|    ep_rew_mean     | 0.16     |
| time/              |          |
|    episodes        | 350      |
|    fps             | 3        |
|    time_elapsed    | 695      |
|    total_timesteps | 2597     |
| train/             |          |
|    actor_loss      | -4.66    |
|    critic_loss     | 1.34     |
|    ent_coef        | 0.477    |
|    ent_coef_loss   | -2.3     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2496     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.74     |
|    ep_rew_mean     | 0.333    |
| time/              |          |
|    episodes        | 400      |
|    fps             | 3        |
|    time_elapsed    | 808      |
|    total_timesteps | 3040     |
| train/             |          |
|    actor_loss      | -3.76    |
|    critic_loss     | 1.31     |
|    ent_coef        | 0.419    |
|    ent_coef_loss   | -2.69    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2939     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.03     |
|    ep_rew_mean     | 0.38     |
| time/              |          |
|    episodes        | 450      |
|    fps             | 3        |
|    time_elapsed    | 931      |
|    total_timesteps | 3500     |
| train/             |          |
|    actor_loss      | -4.07    |
|    critic_loss     | 1.44     |
|    ent_coef        | 0.366    |
|    ent_coef_loss   | -3       |
|    learning_rate   | 0.0003   |
|    n_updates       | 3399     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.67     |
|    ep_rew_mean     | 0.115    |
| time/              |          |
|    episodes        | 500      |
|    fps             | 3        |
|    time_elapsed    | 1040     |
|    total_timesteps | 3907     |
| train/             |          |
|    actor_loss      | -3.16    |
|    critic_loss     | 1.66     |
|    ent_coef        | 0.325    |
|    ent_coef_loss   | -3.43    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3806     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.68     |
|    ep_rew_mean     | 0.0224   |
| time/              |          |
|    episodes        | 550      |
|    fps             | 3        |
|    time_elapsed    | 1164     |
|    total_timesteps | 4368     |
| train/             |          |
|    actor_loss      | -2.8     |
|    critic_loss     | 1.61     |
|    ent_coef        | 0.284    |
|    ent_coef_loss   | -3.71    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4267     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9.22     |
|    ep_rew_mean     | 0.113    |
| time/              |          |
|    episodes        | 600      |
|    fps             | 3        |
|    time_elapsed    | 1291     |
|    total_timesteps | 4829     |
| train/             |          |
|    actor_loss      | -2.5     |
|    critic_loss     | 1.31     |
|    ent_coef        | 0.248    |
|    ent_coef_loss   | -4.08    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4728     |
---------------------------------


In [21]:
model = SAC.load("sac_intersection")

env = gym.make("intersection-v1", render_mode='rgb_array')
obs, info = env.reset()

while True:
    env.render()
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        break

env.close()