## Import Dependencies

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os
import Box2D
import matplotlib.pyplot as plt

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


## Test Environment

In [2]:
environment_name = "CarRacing-v3"   # Updated version
env = gym.make(environment_name, render_mode="human")  # Add render_mode if you want visuals

In [3]:
env.reset()

(array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], dtype=uint8),
 {})

In [4]:
env.action_space

Box([-1.  0.  0.], 1.0, (3,), float32)

In [5]:
env.observation_space

Box(0, 255, (96, 96, 3), uint8)

In [6]:
env.render()

In [7]:
env.close()

In [8]:
env = gym.make("CarRacing-v3", render_mode="human")
episodes = 5
for episode in range(1, episodes + 1):
    obs = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
 
        
        score += reward
    print("Episode:{} Score:{}".format(episode, score))
env.close()

Episode:1 Score:-37.3040752351103
Episode:2 Score:-32.1428571428576
Episode:3 Score:-31.034482758621042
Episode:4 Score:-33.55481727574802
Episode:5 Score:-38.11074918566835


In [9]:
env.close()

## Train Model

In [10]:
env = gym.make(environment_name)
env = DummyVecEnv([lambda: env])

In [11]:
log_path = os.path.join("Training","Logs")
model = PPO("CnnPolicy",env,verbose = 1, tensorboard_log = log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [12]:
model.learn(total_timesteps=100000)

Logging to Training\Logs\PPO_16
-----------------------------
| time/              |      |
|    fps             | 22   |
|    iterations      | 1    |
|    time_elapsed    | 91   |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 18          |
|    iterations           | 2           |
|    time_elapsed         | 223         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008080028 |
|    clip_fraction        | 0.0792      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.00489     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.378       |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00727    |
|    std                  | 0.983       |
|    value_loss           | 0.751       |
--

<stable_baselines3.ppo.ppo.PPO at 0x1c7d68eecf0>

## Save Model

In [23]:
ppo_path = os.path.join("Training","Saved Model","PPO_Driving_Model1")

In [24]:
model.save(ppo_path)

In [30]:
del model

In [31]:
model = PPO.load(ppo_path, env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


## Evaluate and Test

In [33]:
# evaluate_policy(model,env,n_eval_episodes = 10, render = True)




test_env = gym.make("CarRacing-v3", render_mode="human")  # visuals ke liye fresh env
episodes = 5
for episode in range(1, episodes + 1):
    obs, _ = test_env.reset()
    done = False

    while not done:
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = test_env.step(action)
        done = terminated or truncated
        score += reward
    print("Episode:{} Score:{}".format(episode, score))

test_env.close()

Episode:1 Score:3030.230632607725
Episode:2 Score:3892.777449087163
Episode:3 Score:4737.414127287745
Episode:4 Score:5428.622918496249
Episode:5 Score:5534.703999576965
