In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from collections import deque
import random

# Load and prepare the data
def load_traffic_data(data_path):
    # In a real implementation, you'd load from a file
    df = pd.read_csv(data_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    return df

# Define the VSL environment
class VSLEnvironment:
    def __init__(self, data):
        self.data = data
        self.current_step = 0
        self.speed_limits = [60, 80, 100, 120, 140]  # Available speed limits in km/h
        
        # Define state space features
        self.state_features = ['vehicle_count', 'average_speed_kmh', 'vehicle_density_vpkm']
        
        # Normalize the features for better learning
        self.feature_means = data[self.state_features].mean()
        self.feature_stds = data[self.state_features].std()
        
    def reset(self):
        self.current_step = 0
        return self._get_state()
    
    def _get_state(self):
        if self.current_step >= len(self.data):
            return None
        
        current_data = self.data.iloc[self.current_step][self.state_features]
        # Normalize the state
        normalized_state = (current_data - self.feature_means) / self.feature_stds
        return normalized_state.values.astype(np.float32)  # Ensure float32 type
    
    def step(self, action):
        if self.current_step >= len(self.data) - 1:
            return None, 0, True, {}
        
        # Get current traffic conditions
        current_data = self.data.iloc[self.current_step]
        
        # Apply the selected speed limit
        selected_speed_limit = self.speed_limits[action]
        
        # Advance to the next step
        self.current_step += 1
        next_data = self.data.iloc[self.current_step]
        
        # Calculate reward based on traffic efficiency and safety
        reward = self._calculate_reward(current_data, next_data, selected_speed_limit)
        
        # Get new state
        new_state = self._get_state()
        
        # Check if episode is done
        done = self.current_step >= len(self.data) - 1
        
        return new_state, reward, done, {'speed_limit': selected_speed_limit}
    
    def _calculate_reward(self, current_data, next_data, speed_limit):
        # Traffic efficiency component: Reward higher flow rates
        flow_reward = next_data['flow_rate_vph'] / 1000  # Normalize
        
        # Safety component: Penalize if speed is much higher than the limit
        speed_compliance = max(0, 1 - max(0, next_data['average_speed_kmh'] - speed_limit) / 50)
        
        # Stability component: Reward lower variations in speed
        speed_stability = 1 / (1 + abs(next_data['average_speed_kmh'] - current_data['average_speed_kmh']))
        
        # Occupancy component: Reward balanced lane utilization
        occupancy_vars = np.var([
            next_data['occupancy_lane_1'], 
            next_data['occupancy_lane_2'], 
            next_data['occupancy_lane_3']
        ])
        occupancy_balance = 1 / (1 + occupancy_vars/1000)
        
        # Combined reward
        reward = (0.4 * flow_reward + 
                  0.3 * speed_compliance + 
                  0.2 * speed_stability + 
                  0.1 * occupancy_balance)
        
        return float(reward)  # Ensure float type

# Implement Deep Q-Network agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0   # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
    
    def _build_model(self):
        # Neural Net for Deep-Q learning
        model = keras.Sequential()
        model.add(layers.Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(24, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(np.array([state], dtype=np.float32), verbose=0)
        return np.argmax(act_values[0])
    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        
        minibatch = random.sample(self.memory, batch_size)
        
        # Process states and next_states to ensure they're float32
        states = np.array([i[0] for i in minibatch], dtype=np.float32)
        actions = np.array([i[1] for i in minibatch], dtype=np.int32)
        rewards = np.array([i[2] for i in minibatch], dtype=np.float32)
        next_states = np.array([i[3] for i in minibatch], dtype=np.float32)
        dones = np.array([i[4] for i in minibatch], dtype=np.bool_)
        
        # Check if any next_state is None (end of episode)
        # This step is crucial to avoid None values in the array
        mask = ~np.array([ns is None for ns in next_states])
        if not np.any(mask):
            return  # If all next_states are None, exit
            
        # Calculate target values for fitting
        targets = np.zeros((len(minibatch), self.action_size), dtype=np.float32)
        
        # Predict Q-values for current states
        target_f = self.model.predict(states, verbose=0)
        
        # Predict Q-values for next states where available
        if np.any(~dones):
            # Filter out None next_states 
            valid_next_states = next_states[~dones]
            if len(valid_next_states) > 0:
                next_qs_array = self.model.predict(valid_next_states, verbose=0)
                next_qs = np.zeros(len(dones))
                next_qs[~dones] = np.max(next_qs_array, axis=1)
        
        for i, (_, action, reward, _, done) in enumerate(minibatch):
            target = reward
            if not done:
                target += self.gamma * next_qs[i]
            target_f[i][action] = target
            
        # Train the model
        self.model.fit(states, target_f, epochs=1, verbose=0)
        
        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def load(self, name):
        self.model.load_weights(name)
    
    def save(self, name):
        self.model.save_weights(name)

# Main training function
def train_vsl_agent(data, episodes=100, batch_size=32):
    env = VSLEnvironment(data)
    state_size = len(env.state_features)
    action_size = len(env.speed_limits)
    agent = DQNAgent(state_size, action_size)
    
    # Dictionary to store results
    results = {
        'episode': [],
        'average_reward': [],
        'speed_limits': []
    }
    
    for episode in range(episodes):
        total_reward = 0
        state = env.reset()
        speed_limits_applied = []
        done = False
        
        while not done:
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)
            
            if next_state is None:  # End of data
                break
                
            speed_limits_applied.append(info['speed_limit'])
            total_reward += reward
            
            agent.remember(state, action, reward, next_state, done)
            state = next_state
        
        # Train the agent on a batch of experiences
        if len(agent.memory) >= batch_size:
            agent.replay(batch_size)
        
        # Store results
        results['episode'].append(episode)
        results['average_reward'].append(total_reward / len(data))
        results['speed_limits'].append(speed_limits_applied)
        
        print(f"Episode: {episode+1}/{episodes}, Avg Reward: {total_reward/len(data):.4f}")
    
    # Save the trained model
    agent.save("models/vsl_model.weights.h5")
    return results, agent

# Evaluate the trained agent
def evaluate_vsl_agent(data, agent):
    env = VSLEnvironment(data)
    state = env.reset()
    recommended_speeds = []
    actual_speeds = []
    flow_rates = []
    timestamps = []
    done = False
    
    while not done:
        action = agent.act(state)
        speed_limit = env.speed_limits[action]
        recommended_speeds.append(speed_limit)
        
        # Store actual values for comparison
        current_data = data.iloc[env.current_step]
        actual_speeds.append(current_data['average_speed_kmh'])
        flow_rates.append(current_data['flow_rate_vph'])
        timestamps.append(current_data['timestamp'])
        
        next_state, reward, done, _ = env.step(action)
        if next_state is None:
            break
        state = next_state
    
    return timestamps, recommended_speeds, actual_speeds, flow_rates

# Visualize results
def visualize_results(timestamps, recommended_speeds, actual_speeds, flow_rates):
    plt.figure(figsize=(14, 8))
    
    # Plot recommended speed limits and actual speeds
    plt.subplot(2, 1, 1)
    plt.plot(timestamps, recommended_speeds, 'b-', label='Recommended Speed Limit')
    plt.plot(timestamps, actual_speeds, 'r-', label='Actual Average Speed')
    plt.ylabel('Speed (km/h)')
    plt.title('VSL Recommendations vs. Actual Speeds')
    plt.legend()
    plt.grid(True)
    
    # Plot flow rates
    plt.subplot(2, 1, 2)
    plt.plot(timestamps, flow_rates, 'g-', label='Flow Rate')
    plt.ylabel('Flow Rate (vph)')
    plt.xlabel('Time')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig('vsl_results.png')
    plt.close()

# Run the complete pipeline
def run_vsl_pipeline():
    # Load data
    data = load_traffic_data("traffic_data_2.csv")
    
    # Train the agent
    results, agent = train_vsl_agent(data, episodes=50)
    
    # Evaluate the agent
    timestamps, recommended_speeds, actual_speeds, flow_rates = evaluate_vsl_agent(data, agent)
    
    # Visualize results
    visualize_results(timestamps, recommended_speeds, actual_speeds, flow_rates)
    
    return results

if __name__ == "__main__":
    results = run_vsl_pipeline()
    print("VSL implementation completed. Results saved to 'vsl_results.png'")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode: 1/50, Avg Reward: 0.3892
Episode: 2/50, Avg Reward: 0.3719
Episode: 3/50, Avg Reward: 0.3840
Episode: 4/50, Avg Reward: 0.3910
Episode: 5/50, Avg Reward: 0.3899
Episode: 6/50, Avg Reward: 0.3729
Episode: 7/50, Avg Reward: 0.3946
Episode: 8/50, Avg Reward: 0.3915
Episode: 9/50, Avg Reward: 0.4099
Episode: 10/50, Avg Reward: 0.3899
Episode: 11/50, Avg Reward: 0.4470
Episode: 12/50, Avg Reward: 0.3879
Episode: 13/50, Avg Reward: 0.3669
Episode: 14/50, Avg Reward: 0.3938
Episode: 15/50, Avg Reward: 0.3805
Episode: 16/50, Avg Reward: 0.4230
Episode: 17/50, Avg Reward: 0.4280
Episode: 18/50, Avg Reward: 0.4089
Episode: 19/50, Avg Reward: 0.3659
Episode: 20/50, Avg Reward: 0.3577
Episode: 21/50, Avg Reward: 0.4486
Episode: 22/50, Avg Reward: 0.3561
Episode: 23/50, Avg Reward: 0.4189
Episode: 24/50, Avg Reward: 0.4322
Episode: 25/50, Avg Reward: 0.3819
Episode: 26/50, Avg Reward: 0.4194
Episode: 27/50, Avg Reward: 0.3954
Episode: 28/50, Avg Reward: 0.3929
Episode: 29/50, Avg Reward: 0