In [None]:
#!pip install torch torchvision torchaudio gymnasium pandas numpy matplotlib tqdm mpmath==1.2.1

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import matplotlib.pyplot as plt


In [None]:
data = pd.read_csv("train_data.csv")

# Drop timestamp or non-numeric columns if they exist
if 'timestamp' in data.columns:
    data = data.drop(columns=['timestamp'])

# Ensure numeric and fill missing values
data = data.apply(pd.to_numeric, errors='coerce').fillna(0)

print("Shape:", data.shape)
data.head()


In [None]:
class TrafficEnv(gym.Env):
    def __init__(self, data):
        super(TrafficEnv, self).__init__()
        self.data = data.reset_index(drop=True)
        self.max_index = len(data) - 1

        # Define spaces
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                            shape=(len(data.columns)-1,), dtype=np.float32)
        self.action_space = spaces.Discrete(4)  # 4 signal phases

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        # Start from a random point in dataset
        self.current_step = np.random.randint(0, self.max_index - 50)
        obs = self.data.iloc[self.current_step, :-1].values.astype(np.float32)
        info = {}
        return obs, info

    def step(self, action):
        self.current_step += 1
        done = self.current_step >= self.max_index

        row = self.data.iloc[self.current_step]
        wait_time = row.get('waiting_time', 0)
        queue_len = row.get('queue_length', 0)
        emergency = row.get('emergency_detected', 0) if 'emergency_detected' in row else 0

        reward = self.compute_reward(wait_time, queue_len, emergency, action)
        next_obs = row[:-1].values.astype(np.float32)
        truncated = False
        info = {}

        return next_obs, reward, done, truncated, info

    def compute_reward(self, wait_time, queue_len, emergency_detected, action):
        reward = - (0.7 * wait_time + 0.3 * queue_len)
        if emergency_detected and action == 0:  # example: NS-green helps emergency
            reward += 20
        return reward

    def render(self):
        pass


In [None]:
'''def compute_reward(wait_time, queue_len, emergency_detected):

    reward = - (0.7 * wait_time + 0.3 * queue_len)
    if emergency_detected:
        reward += 20  # positive reward for prioritizing emergency
    return reward'''


In [None]:
class PPOAgent(nn.Module):
    def __init__(self, state_dim, action_dim, lr=3e-4):
        super(PPOAgent, self).__init__()
        self.actor = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def act(self, state):
        state = torch.FloatTensor(state)
        probs = self.actor(state)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        return action.item(), dist.log_prob(action)

    def evaluate(self, state, action):
        probs = self.actor(state)
        dist = torch.distributions.Categorical(probs)
        log_prob = dist.log_prob(action)
        entropy = dist.entropy()
        return log_prob, self.critic(state), entropy


In [None]:
env = TrafficEnv(data)
agent = PPOAgent(state_dim=len(data.columns)-1, action_dim=4)

def train_ppo(episodes=200, gamma=0.99, clip_epsilon=0.2):
    reward_history = []

    for episode in tqdm(range(episodes)):
        state, _ = env.reset()
        done = False
        total_reward = 0

        while not done:
            action, log_prob = agent.act(state)
            next_state, reward, done, truncated, _ = env.step(action)

            # PPO updates
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
            reward_tensor = torch.tensor(reward, dtype=torch.float32)

            value = agent.critic(state_tensor)
            next_value = agent.critic(next_state_tensor)
            advantage = reward_tensor + gamma * next_value - value

            new_log_prob, _, entropy = agent.evaluate(
                state_tensor, torch.tensor([action], dtype=torch.long)
            )

            ratio = (new_log_prob - log_prob).exp()
            surr1 = ratio * advantage.detach()
            surr2 = torch.clamp(ratio, 1 - clip_epsilon, 1 + clip_epsilon) * advantage.detach()
            loss = -torch.min(surr1, surr2) + 0.5 * advantage.pow(2) - 0.01 * entropy

            agent.optimizer.zero_grad()
            loss.mean().backward()
            agent.optimizer.step()

            total_reward += reward
            state = next_state

        reward_history.append(total_reward)
        print(f"Episode {episode+1} | Total Reward: {total_reward:.2f}")

    return reward_history

rewards = train_ppo(episodes=200)


In [None]:
torch.save(agent.state_dict(), "ppo_traffic_model.pt")
print("Model saved successfully!")


In [None]:
plt.plot(rewards)
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("PPO Training Progress for Traffic Signal Control")
plt.show()


In [None]:
!pip install stable_baselines3

# Re-initialize the PPOAgent model structure
loaded_agent = PPOAgent(state_dim=len(data.columns)-1, action_dim=4)

# Load the state dictionary into the agent
loaded_agent.load_state_dict(torch.load("/content/ppo_traffic_model.pt"))
loaded_agent.eval() # Set the model to evaluation mode

print("Model loaded successfully!")

In [None]:
print("Continuing training for 200 more episodes...")
new_rewards = train_ppo(episodes=200)
rewards.extend(new_rewards)
print("Training complete.")

In [None]:
plt.plot(rewards)
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("PPO Training Progress for Traffic Signal Control (Extended)")
plt.show()