In [5]:
# !pip install gym[box2d] pyglet 

# Important Libraries

In [2]:
import gymnasium as gym 
from stable_baselines3 import PPO
from stable_baselines3.common.logger import configure
from stable_baselines3.common.vec_env import VecTransposeImage
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold




# Creating Environment

In [16]:
env_name="CarRacing-v2"
env=gym.make(env_name)

In [73]:
print(f"Environment_ID    : {env.spec.id}")
print(f"Action_Space      : {env.action_space}")
print(f"Observation_space : {env.observation_space} \nIt is an image with 0-255 brightness and shape of {env.reset()[0].shape}")  
print(f"Reward_range      : {env.reward_range}")
print(f"Render_mode       : {env.render_mode}")
print(f"Shape_of_Reset    : {env.reset()[0].shape}")
print(f"Reward_threshold  : {env.spec.reward_threshold}")
print(f"Max_episode_steps : {env.spec.max_episode_steps}")

Environment_ID    : CarRacing-v2
Action_Space      : Box([-1.  0.  0.], 1.0, (3,), float32)
Observation_space : Box(0, 255, (96, 96, 3), uint8) 
It is an image with 0-255 brightness and shape of (96, 96, 3)
Reward_range      : (-inf, inf)
Render_mode       : None
Shape_of_Reset    : (96, 96, 3)
Reward_threshold  : 900
Max_episode_steps : 1000


# BASELINE 

In [21]:

# env=gym.make("CarRacing-v2",render_mode="human")  # Render
env=gym.make("CarRacing-v2")
num_episodes=2
total_score=[]
for episode in range(num_episodes):
    state=env.reset()
    done=False
    score=0
    while not done:
#         env.render()
        action=env.action_space.sample()
        next_state,reward,done,info,_=env.step(action)
        state=next_state
        score+=reward
    total_score.append(score)
    print(f"Episode {episode} Score :- {score}")
env.close()
print(f"Baseline of model after {num_episodes} Episodes")
print(f"Average Score after {num_episodes} episodes : {sum(total_score)/num_episodes} ")
print(f"Our Model should score above {sum(total_score)/num_episodes}")

Episode 0 Score :- -437.0000000000054
Episode 1 Score :- -876.3157894737926
Baseline of model after 2 Episodes
Average Score after 2 episodes : -656.657894736899 
Our Model should score above -656.657894736899


In [78]:
env.close()

# Training Model

In [29]:
log_path=r"C:\Users\abdul\Downloads\PROJECTS\RL_PROJECTS\Self_Driving_Car_RL\Training\Logs"
save_path=r"C:\Users\abdul\Downloads\PROJECTS\RL_PROJECTS\Self_Driving_Car_RL\Training\Models"

In [25]:
env=gym.make("CarRacing-v2")

In [26]:
# set up logger
new_logger = configure(log_path, ["stdout", "csv", "tensorboard"])

Logging to C:\Users\abdul\Downloads\PROJECTS\RL_PROJECTS\Self_Driving_Car_RL\Training\Logs


In [27]:
env=gym.make("CarRacing-v2")
model=PPO("CnnPolicy",env=env,verbose=1,tensorboard_log=log_path,)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [30]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=900, verbose=1)
eval_callback = EvalCallback(env, 
                             callback_on_new_best=stop_callback, 
                             eval_freq=10000, 
                             best_model_save_path=save_path, 
                             verbose=1)

In [33]:
model.set_logger(new_logger)
model.learn(total_timesteps=10000,callback=eval_callback)

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -58.9       |
| time/                   |             |
|    fps                  | 27          |
|    iterations           | 1           |
|    time_elapsed         | 73          |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.015192537 |
|    clip_fraction        | 0.161       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.14       |
|    explained_variance   | 0.0442      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0804      |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0155     |
|    std                  | 0.955       |
|    value_loss           | 0.336       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

<stable_baselines3.ppo.ppo.PPO at 0x1b693d80190>

# TensorBoard Visualization

In [None]:
# tensorboard --logdir C:\Users\abdul\Downloads\PROJECTS\RL_PROJECTS\Self_Driving_Car_RL\Training\Logs

# Saving Model

In [66]:
model.save(save_path)

In [35]:
del model

# Loading Model Trained on 2M tirals

In [2]:
import gymnasium as gym 
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecTransposeImage
path=r"C:\Users\abdul\Downloads\PROJECTS\RL_PROJECTS\Self_Driving_Car_RL\Training\PPO_2m_Driving_model.zip"
env=gym.make("CarRacing-v2")
env=Monitor(env)
env=DummyVecEnv([lambda:env])
env=VecTransposeImage(env)
model=PPO.load(path,env=env,print_system_info=True)

== CURRENT SYSTEM INFO ==
- OS: Windows-10-10.0.22631-SP0 10.0.22631
- Python: 3.9.18
- Stable-Baselines3: 2.2.1
- PyTorch: 2.1.1+cpu
- GPU Enabled: False
- Numpy: 1.26.2
- Cloudpickle: 3.0.0
- Gymnasium: 0.28.1
- OpenAI Gym: 0.26.1

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-10-10.0.22631-SP0 10.0.22631
- Python: 3.9.18
- Stable-Baselines3: 2.2.1
- PyTorch: 2.1.1+cpu
- GPU Enabled: False
- Numpy: 1.26.2
- Cloudpickle: 3.0.0
- Gymnasium: 0.28.1
- OpenAI Gym: 0.26.1



In [10]:
model.save(path)

In [5]:
model.learn(total_timesteps=20000,reset_num_timesteps=False)

Logging to Training\Logs\PPO_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 998      |
|    ep_rew_mean     | 725      |
| time/              |          |
|    fps             | 72       |
|    iterations      | 1        |
|    time_elapsed    | 28       |
|    total_timesteps | 47104    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 998        |
|    ep_rew_mean          | 731        |
| time/                   |            |
|    fps                  | 59         |
|    iterations           | 2          |
|    time_elapsed         | 69         |
|    total_timesteps      | 49152      |
| train/                  |            |
|    approx_kl            | 0.10053707 |
|    clip_fraction        | 0.527      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.77      |
|    explained_variance   | 0.646      |
|    learning_

<stable_baselines3.ppo.ppo.PPO at 0x26d7d494130>

# Evaluation

In [68]:
env=gym.make("CarRacing-v2",render_mode="human")

In [69]:
mean_reward, std_reward=evaluate_policy(model,env=env,n_eval_episodes=2,render=False,return_episode_rewards=True)
print("Mean Reward :- ",mean_reward)
print("Std Reward :- ",std_reward)



Mean Reward :-  [860.1593554615974, 759.9348765835166]
Std Reward :-  [1000, 1000]


In [77]:
env.close()

# Testing

In [6]:
# env=gym.make("CarRacing-v2")
env=gym.make("CarRacing-v2",render_mode="human") #render

In [None]:
num_episodes=1
total_score=[]
for episode in range(num_episodes):
    score=0
    state=env.reset()[0]
    done=False
    score=0
    while not done:
#         env.render()
        action = model.predict(state)[0]
        next_state,reward,done,info,_ = env.step(action)
        score += reward
        state = next_state
    total_score.append(score)
    print(f"Episode {episode+1} Score :- {score}")
print(f"Average Score after {num_episodes} episodes :- {sum(total_score)/num_episodes} ")

In [8]:
env.close()