# 1. Import Dependencies

In [9]:

import gymnasium as gym  # Updated to gymnasium
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
import os

# Test Environment
environment_name = "CarRacing-v3"

# Create environment
env = gym.make(environment_name, render_mode="human")  # Add render_mode for compatibility

episodes = 5
for episode in range(1, episodes + 1):
    state, info = env.reset()  # Updated to include info in reset
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, terminated, truncated, info = env.step(action)  # Updated for gymnasium's step output
        done = terminated or truncated  # Combine termination conditions
        score += reward

    print(f"Episode: {episode} Score: {score}")

env.close()

# Example actions and observations
print(env.action_space.sample())
print(env.observation_space.sample())

# Train Model
log_path = os.path.join('Training', 'Logs')

# Ensure environment compatibility with Stable-Baselines3
env = gym.make(environment_name)
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

model.learn(total_timesteps=40000)

# Save Model
ppo_path = os.path.join('Training', 'Saved Models', 'PPO_Driving_model')
model.save(ppo_path)

# Evaluate and Test
evaluate_policy(model, env, n_eval_episodes=10, render=True)

env.close()

# Test the trained model
obs, info = env.reset()  
while True:
    action, _states = model.predict(obs)
    obs, rewards, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()

    if done:
        break

env.close()


Episode: 1 Score: -35.897435897436424
Episode: 2 Score: -31.899641577061487
Episode: 3 Score: -35.483870967742504
Episode: 4 Score: -22.48062015503873
Episode: 5 Score: -30.40293040293085
[0.60948205 0.53257376 0.18287168]
[[[ 11   1  76]
  [222  40 180]
  [134 186 235]
  ...
  [135 221 161]
  [184 107  92]
  [ 20 181  92]]

 [[176 161 197]
  [123 181 176]
  [177 165 228]
  ...
  [ 27 109  63]
  [119 151  55]
  [ 62 226 248]]

 [[ 73 214  54]
  [229   8 231]
  [245  36 155]
  ...
  [  1 176 142]
  [245 104 135]
  [125 228 160]]

 ...

 [[189 209  30]
  [ 16 183 121]
  [140 225 228]
  ...
  [ 10 203 138]
  [ 30  61 190]
  [238 247  74]]

 [[171 157  90]
  [201 114 168]
  [180 125  59]
  ...
  [122 150   7]
  [231 164 170]
  [101 190 215]]

 [[238 186  43]
  [ 18 246  27]
  [ 10 251  45]
  ...
  [230 187  89]
  [ 41  41  91]
  [ 77 149 249]]]
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


[W1225 03:42:30.439533792 NNPACK.cpp:61] Could not initialize NNPACK! Reason: Unsupported hardware.


Logging to Training/Logs/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -59.6    |
| time/              |          |
|    fps             | 77       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -52.2       |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 2           |
|    time_elapsed         | 71          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.006541088 |
|    clip_fraction        | 0.0443      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.22       |
|    explained_variance   | -0.00775    |

  gym.logger.warn(


In [11]:
from stable_baselines3 import PPO
import gymnasium as gym
import os

# Define the environment name
environment_name = "CarRacing-v3"

# Create the environment
env = gym.make(environment_name, render_mode="human")  # Add render_mode for visualization

# Specify the path to the saved model
ppo_path = os.path.join('Training', 'Saved Models', 'PPO_Driving_model.zip')

# Load the saved model
model = PPO.load(ppo_path)

# Test the model
obs, info = env.reset()  # Reset the environment
while True:
    # Use the model to predict actions
    action, _states = model.predict(obs)
    obs, rewards, terminated, truncated, info = env.step(action)  # Step through the environment
    done = terminated or truncated
    env.render()  # Render the environment

    if done:
        break  # Exit the loop if the episode ends

env.close()  # Close the environment
