<a href="https://colab.research.google.com/github/Shreyas0744/Deep-Reinforcement-Learning-for-Robust-Vehicle-Control/blob/main/DRL_for_Robust_Vehicle_Control.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install high-performance RL libraries
!pip install stable-baselines3[extra] shimmy gymnasium

import gymnasium as gym
import numpy as np
import torch
from stable_baselines3 import SAC
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor

Collecting shimmy
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.7.1-py3-none-any.whl.metadata (4.8 kB)
Downloading Shimmy-2.0.0-py3-none-any.whl (30 kB)
Downloading stable_baselines3-2.7.1-py3-none-any.whl (188 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.0/188.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: shimmy, stable-baselines3
Successfully installed shimmy-2.0.0 stable-baselines3-2.7.1


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [2]:
# THE M.TECH EDGE: Custom Sensor Noise Wrapper
# This simulates the 'Robustness' needed for rain/fog in real-world automobile control
class SensorNoiseWrapper(gym.ObservationWrapper):
    def __init__(self, env, noise_level=0.05):
        super().__init__(env)
        self.noise_level = noise_level

    def observation(self, obs):
        # Add Gaussian noise to simulate faulty sensors/weather interference
        noise = np.random.normal(0, self.noise_level, obs.shape)
        return (obs + noise).astype(np.float32)

# Setup the Continuous Control Environment
# We use 'Pendulum-v1' as a mathematical proxy for steering/torque control
def make_robust_env():
    env = gym.make("Pendulum-v1", render_mode="rgb_array")
    env = Monitor(env)
    env = SensorNoiseWrapper(env, noise_level=0.1) # Simulate high-noise environment
    return env

env = make_robust_env()

In [4]:
# Initialize SAC (Soft Actor-Critic)
# SAC is chosen because it maximizes Entropy (Exploration) + Reward
# Perfect for non-linear mechanical systems
model = SAC(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=3e-4,
    buffer_size=50000,
    batch_size=256,
    tau=0.005,      # Soft update coefficient
    gamma=0.99,     # Discount factor
    ent_coef='auto' # Automatic entropy tuning (Modern RL approach)
)

# Train the Robust Control Policy
print("Starting training in noisy environment...")
model.learn(total_timesteps=20000, progress_bar=True)

Output()

Using cpu device
Wrapping the env in a DummyVecEnv.
Starting training in noisy environment...
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 200       |
|    ep_rew_mean     | -1.58e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 38        |
|    time_elapsed    | 20        |
|    total_timesteps | 800       |
| train/             |           |
|    actor_loss      | 24.8      |
|    critic_loss     | 3.15      |
|    ent_coef        | 0.812     |
|    ent_coef_loss   | -0.333    |
|    learning_rate   | 0.0003    |
|    n_updates       | 699       |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 200       |
|    ep_rew_mean     | -1.52e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 35        |
|    time_elapsed    | 45        |
|    total_timesteps | 1600    

<stable_baselines3.sac.sac.SAC at 0x7f086efe20c0>

In [5]:
# Evaluate and Save
# In a real M.Tech project, you would save this to GitHub to show 'Sim-to-Real' potential
model.save("robust_vehicle_sac")
print("Model trained and saved.")

# Visualizing the Control Strategy
obs, info = env.reset()
for _ in range(200):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        obs, info = env.reset()
print("Deployment test complete.")

Model trained and saved.
Deployment test complete.
