In [8]:
import gymnasium as gym
import highway_env
import numpy as np
import random

def is_in_visited_states(state, visited_states, threshold=0.0001):
    for i, visited_state in enumerate(visited_states):
        if np.linalg.norm(np.array(state) - np.array(visited_state)) < threshold:
            return True, i  # Return index if found
    return False, None


# Create the environment
config = {
    "observation": {
        "type": "Kinematics",
        "features": ["x", "y", "vx", "vy"],
        "features_range": {
            "x": [-100, 100],
            "y": [-100, 100],
            "vx": [-20, 20],
            "vy": [-20, 20]
        },
        "absolute": False,
        "lanes_count": 2,
        "vehicles_count": 2,
        "initial_lane_id": 0
    },
        "order": "sorted",
        "lanes_count": 2,
        "vehicles_count": 1,
        "collision_reward": -20,#-10
        "high_speed_reward": 0,#0
        "lane_change_reward": 0,#0
        "right_lane_reward": 0,
        "on_road_reward": -20,
        "initial_lane_id": 0,
        "absolute": False
}
env = gym.make('highway-v0', render_mode="human", config=config)
# List to store total rewards and actions for each episode
state_policy = []
state_V = []
total_rewards = []
action_length = []
visited_states = []

import pickle
with open("state_policy.pkl", "rb") as f:
    state_policy = pickle.load(f)
with open("state_V.pkl", "rb") as f:
    state_V = pickle.load(f)
with open("total_rewards.pkl", "rb") as f:
    total_rewards = pickle.load(f)
with open("action_length.pkl", "rb") as f:
    action_length = pickle.load(f)
with open("visited_states.pkl", "rb") as f:
    visited_states = pickle.load(f)

In [2]:
print(len(visited_states))
print(len(state_V))
print(len(state_policy))
print(len(total_rewards))
print(len(action_length))
print(state_policy)
print(state_V)
print(total_rewards)
print(visited_states)
print(action_length)

3997
3997
3997
4005
4005
[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [9]:
test_action_length = 0
reward1 = 0
state, _ = env.reset()
done = False

while not done:
    s1 = np.round(state,2)
    print("state:", s1)
    c1, c2 = is_in_visited_states(s1,visited_states)
    if c1:
      action = state_policy[c2]
      print("|",c2,"|")
    else:
      action = env.action_space.sample()
      print("?")
    n_state, reward, done, additional_info, info = env.step(action)
    test_action_length += 1
    print(action, reward)
    a = n_state
    # print(a)
    reward1 += reward
    state = n_state
    if additional_info:
      done = True
print(test_action_length,reward1)


state: [[ 1.    0.    1.    0.  ]
 [ 0.28  0.   -0.17  0.  ]]
| 2345 |
0 0.0
state: [[ 1.    0.    1.    0.  ]
 [ 0.25  0.   -0.17  0.  ]]
| 293 |
0 0.0
state: [[ 1.    0.    1.    0.  ]
 [ 0.21  0.   -0.17  0.  ]]
| 175 |
0 0.0
state: [[ 1.    0.    1.    0.  ]
 [ 0.18  0.   -0.17  0.  ]]
| 2346 |
0 0.0
state: [[ 1.    0.    1.    0.  ]
 [ 0.15  0.   -0.17  0.  ]]
| 2347 |
0 0.0
state: [[ 1.    0.    1.    0.  ]
 [ 0.11  0.   -0.17  0.  ]]
| 2348 |
0 0.0
state: [[ 1.    0.    1.    0.  ]
 [ 0.08  0.   -0.17  0.  ]]
| 2349 |
2 0.0
state: [[ 1.    0.03  1.    0.08]
 [ 0.05 -0.03 -0.17 -0.08]]
| 2802 |
1 0.0
state: [[ 1.    0.04  1.    0.01]
 [ 0.01 -0.04 -0.17 -0.01]]
| 3424 |
1 0.0
state: [[ 1.    0.04  1.    0.  ]
 [-0.02 -0.04 -0.17 -0.  ]]
| 2352 |
1 0.0
state: [[ 1.    0.04  1.    0.  ]
 [-0.05 -0.04 -0.17 -0.  ]]
| 2353 |
0 0.0
state: [[ 1.    0.01  1.   -0.08]
 [ 0.    0.    0.    0.  ]]
| 7 |
0 0.0
state: [[ 1.    0.    1.   -0.01]
 [ 0.    0.    0.    0.  ]]
| 8 |
0 0.0
state: 