# Highway Environment

Let's look at the highway environment https://github.com/Farama-Foundation/HighwayEnv

Set up the environment by adding the submodule to this folder.

Setup the environment by calling 

```
cd HighwayEnv
conda activate q-learning
python setup develop
```

In [2]:
import gymnasium as gym
import numpy as np
from pprint import pprint

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


Let's define the gymnasium environment, and see what we can do in it.

We define observations according to the definition in https://highway-env.readthedocs.io/en/latest/observations/index.html

We have the observation parameters 
- `presence` (Whether vehicle is visible in view)
- `x`
- `y`
- `vx`
- `vy`
- `cos_h`
- `sin_h`

In [9]:
env = gym.make('highway-v0', render_mode='rgb_array')

config = {
    "observation": {
        "type": "Kinematics",
        "vehicles_count": 15,
        "features": ["presence", "x", "y", "vx", "vy"],
        "features_range": {
            "x": [-100, 100],
            "y": [-100, 100],
            "vx": [-20, 20],
            "vy": [-20, 20]
        },
        "absolute": False,
        "order": "sorted",
        "normalize": False
    },
    "duration": 20,
    "vehicles_count": 20,
    "collision_reward": -1,
    "high_speed_reward": 0.4
}

env.configure(config)

pprint(env.config)

{'action': {'type': 'DiscreteMetaAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'controlled_vehicles': 1,
 'duration': 20,
 'ego_spacing': 2,
 'high_speed_reward': 0.4,
 'initial_lane_id': None,
 'lane_change_reward': 0,
 'lanes_count': 4,
 'manual_control': False,
 'normalize_reward': True,
 'observation': {'absolute': False,
                 'features': ['presence', 'x', 'y', 'vx', 'vy'],
                 'features_range': {'vx': [-20, 20],
                                    'vy': [-20, 20],
                                    'x': [-100, 100],
                                    'y': [-100, 100]},
                 'normalize': False,
                 'order': 'sorted',
                 'type': 'Kinematics',
                 'vehicles_count': 15},
 'offroad_terminal': False,
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_sp

  logger.warn(


In [10]:
state, info = env.reset()
state

array([[  1.       , 177.39053  ,   4.       ,  25.       ,   0.       ],
       [  1.       ,  20.5727   ,   0.       ,  -3.343128 ,   0.       ],
       [  1.       ,  40.825012 ,   0.       ,  -3.3453565,   0.       ],
       [  1.       ,  62.15614  ,   0.       ,  -3.365775 ,   0.       ],
       [  1.       ,  81.18384  ,  -4.       ,  -3.7396436,   0.       ],
       [  1.       , 103.0921   ,  -4.       ,  -2.3286119,   0.       ],
       [  1.       , 123.91152  ,  -4.       ,  -3.0356705,   0.       ],
       [  1.       , 142.46057  ,   8.       ,  -3.0500135,   0.       ],
       [  1.       , 164.62222  ,   0.       ,  -2.1595054,   0.       ],
       [  1.       , 185.62967  ,   8.       ,  -2.391198 ,   0.       ],
       [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.       ,   0.       ,   0.

Lets see, which actions we can perform

In [110]:
print(list(env.action_type.actions_indexes.keys()))

['LANE_LEFT', 'IDLE', 'LANE_RIGHT', 'FASTER', 'SLOWER']


In [111]:
def get_action(obs):
    distances_to_vehicles = np.linalg.norm(obs[:,1:2],axis=1)
    if distances_to_vehicles[obs[:,0] > 0].min() < 0.15:
        action = env.action_type.actions_indexes["SLOWER"]
        print(f"SLOWER:\t{distances_to_vehicles[obs[:,0] > 0].min()}")
    else:
        action = env.action_type.actions_indexes["FASTER"]
        print(f"FASTER:\t{distances_to_vehicles[obs[:,0] > 0].min()}")
    return action

In [112]:
obs, info = env.reset()
print("Action\tDistance to closest vehicle")
for _ in range(30):
    obs, reward, done, truncated, info = env.step(get_action(obs))
    env.render()
env.close()

FASTER: 0.22860531508922577
FASTER: 0.16660159826278687
SLOWER: 0.06791802495718002
SLOWER: 0.015495242550969124
SLOWER: 0.05618380382657051
SLOWER: 0.07327571511268616
SLOWER: 0.08571264147758484
SLOWER: 0.09643597900867462
FASTER: 0.2049071490764618
FASTER: 0.20152637362480164
FASTER: 0.15337350964546204
SLOWER: 0.08153873682022095
SLOWER: 0.030872169882059097
SLOWER: 0.025245536118745804
SLOWER: 0.04354032874107361
SLOWER: 0.06577606499195099
SLOWER: 0.08851192146539688
SLOWER: 0.11114273965358734
SLOWER: 0.13355320692062378
SLOWER: 0.14697128534317017
FASTER: 0.15184777975082397
SLOWER: 0.13167503476142883
SLOWER: 0.11607901751995087
SLOWER: 0.1176508441567421
SLOWER: 0.12221099436283112
SLOWER: 0.1273345947265625
SLOWER: 0.13260595500469208
SLOWER: 0.13795307278633118
SLOWER: 0.14336296916007996
SLOWER: 0.1488330215215683
