In [None]:
sdkfksj

In [1]:
import os
import sys
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import matplotlib.pyplot as plt
import time
import json
import traci

In [2]:
# הגדר נתיב ל-SUMO
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    # הגדר את הנתיב ישירות אם הוא לא מוגדר כמשתנה סביבה
    sumo_path = r"C:\Program Files (x86)\Eclipse\Sumo"  # שנה לנתיב שלך
    os.environ['SUMO_HOME'] = sumo_path
    tools = os.path.join(sumo_path, 'tools')
    sys.path.append(tools)

In [3]:
class TrafficSimEnv:
    """SUMO traffic simulation environment for RL agents."""
    
    def __init__(self, sumo_config, max_steps=3600):
        """
        Initialize the environment.
        
        Args:
            sumo_config: Path to SUMO configuration file
            max_steps: Maximum number of steps per episode
        """
        self.sumo_config = sumo_config
        self.max_steps = max_steps
        self.current_step = 0
        
        # Traffic light IDs
        self.traffic_lights = ["A", "B"]
        
        # State and action space dimensions
        self.state_size = 30  # Will be defined by _get_state shape
        self.action_size = 4  # 4 possible green phases per intersection
        
    def reset(self):
        """Reset the environment and start a new episode."""
        # Close previous simulation if still running
        if hasattr(self, 'sumo_running') and self.sumo_running:
            traci.close()
        
        # Start SUMO simulation
        if 'SUMO_HOME' in os.environ:
            sumo_binary = os.path.join(os.environ['SUMO_HOME'], 'bin', 'sumo')
        else:
            sumo_binary = "sumo"
            
        sumo_cmd = [sumo_binary, "-c", self.sumo_config, "--no-step-log", "true", "--no-warnings", "true"]
        traci.start(sumo_cmd)
        self.sumo_running = True
        
        self.current_step = 0
        
        return self._get_state()
        
    def step(self, action):
        """
        Take a step in the environment.
        
        Args:
            action: List of actions [action_A, action_B] for each traffic light
            
        Returns:
            next_state: New state after action
            reward: Reward for the action
            done: Whether the episode is finished
            info: Additional information (empty dict for now)
        """
        # Apply the action in the simulation
        self._apply_action(action)
        
        # Advance simulation one step
        traci.simulationStep()
        self.current_step += 1
        
        # Get new state and calculate reward
        next_state = self._get_state()
        reward = self._calculate_reward()
        
        # Check if episode is done
        done = (self.current_step >= self.max_steps or 
                traci.simulation.getMinExpectedNumber() <= 0)
        
        return next_state, reward, done, {}
        
    def _get_state(self):
        """
        Get the current state of the environment.
        
        Returns:
            state: Array representing the current state
        """
        state = []
        
        # For each traffic light (A and B)
        for tl_id in self.traffic_lights:
            # Get incoming lanes for the traffic light
            incoming_lanes = self._get_incoming_lanes(tl_id)
            
            for lane in incoming_lanes:
                # Queue length in each lane
                queue_length = traci.lane.getLastStepHaltingNumber(lane)
                state.append(queue_length)
                
                # Average waiting time in each lane
                waiting_time = traci.lane.getWaitingTime(lane)
                state.append(waiting_time)
                
                # Number of approaching vehicles (not stopped yet)
                vehicles = traci.lane.getLastStepVehicleIDs(lane)
                approaching_vehicles = 0
                for veh in vehicles:
                    if not traci.vehicle.isStopped(veh):
                        approaching_vehicles += 1
                state.append(approaching_vehicles)
            
            # Current traffic light phase
            current_phase = traci.trafficlight.getPhase(tl_id)
            # Normalize to 0,1,2,3 (because we have 4 green phases)
            normalized_phase = current_phase // 2
            state.append(normalized_phase)
            
            # Time elapsed since last phase change
            phase_duration = traci.trafficlight.getPhaseDuration(tl_id) - traci.trafficlight.getNextSwitch(tl_id)
            state.append(phase_duration)
        
        return np.array(state)
    
    def _get_incoming_lanes(self, tl_id):
        """
        Get all incoming lanes for a traffic light.
        
        Args:
            tl_id: Traffic light ID
            
        Returns:
            List of lane IDs
        """
        links = traci.trafficlight.getControlledLinks(tl_id)
        incoming_lanes = []
        
        for link in links:
            if link:  # Some links might be empty
                incoming_lane = link[0][0]  # Format is ((in_lane, out_lane, via_lane), ...)
                if incoming_lane not in incoming_lanes:
                    incoming_lanes.append(incoming_lane)
        
        return incoming_lanes
    
    def _apply_action(self, action):
        """
        Apply the selected action to the environment.
        
        Args:
            action: List of actions [action_A, action_B] for each traffic light
        """
        # For each traffic light
        for i, tl_id in enumerate(self.traffic_lights):
            # Convert action to target phase (0,1,2,3 -> 0,2,4,6 which are green phases)
            target_phase = action[i] * 2
            current_phase = traci.trafficlight.getPhase(tl_id)
            
            # If the target phase is different from the current phase
            if current_phase != target_phase:
                # If we're in a green phase, we need to go through yellow first
                if current_phase % 2 == 0:  # Phases 0,2,4,6 are green
                    yellow_phase = current_phase + 1
                    traci.trafficlight.setPhase(tl_id, yellow_phase)
                    
                    # Wait 3 seconds (yellow phase)
                    for _ in range(3):
                        traci.simulationStep()
                
                # Now go to the target green phase
                traci.trafficlight.setPhase(tl_id, target_phase)
    
    def _calculate_reward(self):
        """
        Calculate the reward based on waiting time and queue length.
        
        Returns:
            reward: Calculated reward value
        """
        reward = 0
        
        # Calculate total waiting time and number of vehicles
        total_waiting_time = 0
        total_vehicles = 0
        
        for tl_id in self.traffic_lights:
            incoming_lanes = self._get_incoming_lanes(tl_id)
            
            for lane in incoming_lanes:
                waiting_time = traci.lane.getWaitingTime(lane)
                total_waiting_time += waiting_time
                
                halting_vehicles = traci.lane.getLastStepHaltingNumber(lane)
                total_vehicles += halting_vehicles
        
        # Avoid division by zero
        if total_vehicles > 0:
            avg_waiting_time = total_waiting_time / total_vehicles
        else:
            avg_waiting_time = 0
        
        # Reward is negative of waiting time and vehicle count
        waiting_time_penalty = -avg_waiting_time
        vehicles_penalty = -total_vehicles * 0.1
        
        reward = waiting_time_penalty + vehicles_penalty
        
        return reward
    
    def close(self):
        """Close the SUMO simulation."""
        if hasattr(self, 'sumo_running') and self.sumo_running:
            traci.close()
            self.sumo_running = False

In [4]:
class DQNAgent:
    """Deep Q-Network Agent for traffic signal control."""
    
    def __init__(self, state_size, action_size, learning_rate=0.001):
        """
        Initialize the DQN agent.
        
        Args:
            state_size: Dimension of state space
            action_size: Dimension of action space per intersection
            learning_rate: Learning rate for the neural network
        """
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # Discount factor
        self.epsilon = 1.0   # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = learning_rate
        
        # Build separate models for each intersection
        self.model_A = self._build_model()
        self.model_B = self._build_model()
        
    def _build_model(self):
        """
        Build a neural network for Q-function approximation.
        
        Returns:
            Compiled Keras model
        """
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        
        # שורה זו היא הבעייתית - משתמשת ב-lr במקום learning_rate
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        
        return model
        
    def remember(self, state, action, reward, next_state, done):
        """
        Store experience in replay memory.
        
        Args:
            state: Current state
            action: Action taken
            reward: Reward received
            next_state: Next state
            done: Whether the episode is done
        """
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        """
        Choose action according to epsilon-greedy policy.
        
        Args:
            state: Current state
            
        Returns:
            Selected actions for each intersection [action_A, action_B]
        """
        # Exploration: random action
        if np.random.rand() <= self.epsilon:
            return [np.random.randint(self.action_size), 
                    np.random.randint(self.action_size)]
        
        # Exploitation: predict Q-values and choose best action
        act_values_A = self.model_A.predict(state.reshape(1, -1))
        act_values_B = self.model_B.predict(state.reshape(1, -1))
        
        return [np.argmax(act_values_A[0]), np.argmax(act_values_B[0])]
        
    def replay(self, batch_size):
        """
        Train the agent with experiences from replay memory.
        
        Args:
            batch_size: Number of samples to use for training
        """
        if len(self.memory) < batch_size:
            return
            
        minibatch = random.sample(self.memory, batch_size)
        
        for state, action, reward, next_state, done in minibatch:
            # Predict Q-values for next state
            next_act_values_A = self.model_A.predict(next_state.reshape(1, -1))
            next_act_values_B = self.model_B.predict(next_state.reshape(1, -1))
            
            # Calculate target Q-value for each intersection
            target_A = reward
            target_B = reward
            
            if not done:
                target_A += self.gamma * np.amax(next_act_values_A[0])
                target_B += self.gamma * np.amax(next_act_values_B[0])
            
            # Update Q-values in the model
            target_f_A = self.model_A.predict(state.reshape(1, -1))
            target_f_B = self.model_B.predict(state.reshape(1, -1))
            
            target_f_A[0][action[0]] = target_A
            target_f_B[0][action[1]] = target_B
            
            # Train the models
            self.model_A.fit(state.reshape(1, -1), target_f_A, epochs=1, verbose=0)
            self.model_B.fit(state.reshape(1, -1), target_f_B, epochs=1, verbose=0)
            
        # Update exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def save(self, directory="models"):
        """
        Save the agent models.
        
        Args:
            directory: Directory to save models in
        """
        if not os.path.exists(directory):
            os.makedirs(directory)
            
        self.model_A.save(os.path.join(directory, "model_A.h5"))
        self.model_B.save(os.path.join(directory, "model_B.h5"))
        
    def load(self, directory="models"):
        """
        Load the agent models.
        
        Args:
            directory: Directory to load models from
        """
        self.model_A = tf.keras.models.load_model(os.path.join(directory, "model_A.h5"))
        self.model_B = tf.keras.models.load_model(os.path.join(directory, "model_B.h5"))

In [5]:
def train_agent(sumo_config, episodes=100, batch_size=32, model_dir="models", stats_dir="stats"):
    """
    Train the RL agent.
    
    Args:
        sumo_config: Path to SUMO configuration file
        episodes: Number of episodes to train for
        batch_size: Batch size for training
        model_dir: Directory to save models
        stats_dir: Directory to save training statistics
    
    Returns:
        Trained agent
    """
    # Create directories if they don't exist
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(stats_dir):
        os.makedirs(stats_dir)
    
    # Initialize environment and agent
    env = TrafficSimEnv(sumo_config)
    agent = DQNAgent(state_size=env.state_size, action_size=env.action_size)
    
    # Statistics tracking
    stats = {
        'episode_rewards': [],
        'average_waiting_times': [],
        'average_vehicles': [],
        'training_times': []
    }
    
    # Training loop
    for e in range(episodes):
        start_time = time.time()
        
        # Reset environment
        state = env.reset()
        total_reward = 0
        total_waiting_time = 0
        total_vehicles = 0
        step_count = 0
        
        done = False
        while not done:
            # Select action
            action = agent.act(state)
            
            # Take action and observe result
            next_state, reward, done, _ = env.step(action)
            
            # Store experience in memory
            agent.remember(state, action, reward, next_state, done)
            
            # Move to next state
            state = next_state
            total_reward += reward
            step_count += 1
            
            # Train agent on batch from memory
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        
        # Calculate statistics for this episode
        episode_time = time.time() - start_time
        
        # Save episode statistics
        stats['episode_rewards'].append(total_reward)
        stats['average_waiting_times'].append(total_waiting_time / max(1, step_count))
        stats['average_vehicles'].append(total_vehicles / max(1, step_count))
        stats['training_times'].append(episode_time)
        
        # Print progress
        print(f"Episode: {e+1}/{episodes}, Reward: {total_reward:.2f}, "
              f"Epsilon: {agent.epsilon:.2f}, Time: {episode_time:.2f}s")
        
        # Save agent every 10 episodes
        if (e + 1) % 10 == 0:
            agent.save(model_dir)
            
            # Save training statistics
            with open(os.path.join(stats_dir, 'training_stats.json'), 'w') as f:
                json.dump(stats, f)
            
            # Plot training progress
            plot_training_progress(stats, stats_dir)
    
    # Save final agent
    agent.save(model_dir)
    
    # Save final statistics
    with open(os.path.join(stats_dir, 'training_stats.json'), 'w') as f:
        json.dump(stats, f)
    
    # Plot final training progress
    plot_training_progress(stats, stats_dir)
    
    # Close environment
    env.close()
    
    return agent

def plot_training_progress(stats, stats_dir):
    """
    Plot training progress.
    
    Args:
        stats: Dictionary of training statistics
        stats_dir: Directory to save plots
    """
    # Plot episode rewards
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1)
    plt.plot(stats['episode_rewards'])
    plt.title('Episode Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    
    # Plot average waiting times
    plt.subplot(1, 3, 2)
    plt.plot(stats['average_waiting_times'])
    plt.title('Average Waiting Time')
    plt.xlabel('Episode')
    plt.ylabel('Time')
    
    # Plot average vehicles
    plt.subplot(1, 3, 3)
    plt.plot(stats['average_vehicles'])
    plt.title('Average Vehicles')
    plt.xlabel('Episode')
    plt.ylabel('Count')
    
    plt.tight_layout()
    plt.savefig(os.path.join(stats_dir, 'training_progress.png'))
    plt.close()

In [6]:
# הגדרת נתיבים לקבצי קונפיגורציה
sumo_config_path = "./sumo_config/config.sumocfg"  # שנה זאת לנתיב הנכון

# נסיון אימון עם הסוכן המקורי
try:
    print("נסיון אימון עם הסוכן המקורי (DQNAgent)...")
    agent = train_agent(
        sumo_config=sumo_config_path,
        agent_class=DQNAgent,  # הסוכן המקורי עם lr
        episodes=5,  # מספר קטן של אפיזודות לבדיקה
        batch_size=32,
        model_dir="models",
        stats_dir="stats"
    )
    print("האימון הצליח עם הסוכן המקורי!")
    
except Exception as e:
    print(f"שגיאה עם הסוכן המקורי: {e}")
    
    try:
        print("\nנסיון אימון עם הסוכן המתוקן (DQNAgentFixed)...")
        agent = train_agent(
            sumo_config=sumo_config_path,
            agent_class=DQNAgentFixed,  # הסוכן המתוקן עם learning_rate
            episodes=5,
            batch_size=32,
            model_dir="models",
            stats_dir="stats"
        )
        print("האימון הצליח עם הסוכן המתוקן!")
        
    except Exception as e:
        print(f"שגיאה עם הסוכן המתוקן: {e}")
        
        try:
            print("\nנסיון אימון עם הסוכן הפשוט (DQNAgentSimple)...")
            agent = train_agent(
                sumo_config=sumo_config_path,
                agent_class=DQNAgentSimple,  # הסוכן הפשוט ללא פרמטרים
                episodes=5,
                batch_size=32,
                model_dir="models",
                stats_dir="stats"
            )
            print("האימון הצליח עם הסוכן הפשוט!")
            
        except Exception as e:
            print(f"שגיאה עם הסוכן הפשוט: {e}")
            print("\nכל הניסיונות נכשלו. בדוק את הודעות השגיאה ותקן בהתאם.")

NameError: name 'os' is not defined

In [None]:
def evaluate_agent(sumo_config, agent_class=None, model_dir="models", num_episodes=5, results_dir="results"):
    """
    Evaluate a trained agent.
    
    Args:
        sumo_config: Path to SUMO configuration file
        agent_class: Class of agent to use (if None, will try to determine from saved model)
        model_dir: Directory containing trained models
        num_episodes: Number of episodes to evaluate
        results_dir: Directory to save evaluation results
    """
    # Create results directory if it doesn't exist
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    
    # Initialize environment
    env = TrafficSimEnv(sumo_config)
    
    # Determine which agent class to use
    if agent_class is None:
        # Try to find out which agent class was successful
        try:
            # First try the original agent
            agent = DQNAgent(state_size=env.state_size, action_size=env.action_size)
            agent.load(model_dir)
            print("Using original DQNAgent for evaluation")
        except:
            try:
                # Then try the fixed agent
                agent = DQNAgentFixed(state_size=env.state_size, action_size=env.action_size)
                agent.load(model_dir)
                print("Using DQNAgentFixed for evaluation")
            except:
                # Finally try the simple agent
                agent = DQNAgentSimple(state_size=env.state_size, action_size=env.action_size)
                agent.load(model_dir)
                print("Using DQNAgentSimple for evaluation")
    else:
        # Use the specified agent class
        agent = agent_class(state_size=env.state_size, action_size=env.action_size)
        agent.load(model_dir)
    
    # Disable exploration for evaluation
    agent.epsilon = 0
    
    # Statistics tracking
    results = {
        'episode_rewards': [],
        'waiting_times': [],
        'vehicle_counts': [],
        'throughput': []
    }
    
    # Evaluation loop
    for e in range(num_episodes):
        # Reset environment
        state = env.reset()
        episode_reward = 0
        step_waiting_times = []
        step_vehicle_counts = []
        vehicles_completed = 0
        
        done = False
        while not done:
            # Select action (greedy policy)
            action = agent.act(state)
            
            # Take action
            next_state, reward, done, _ = env.step(action)
            
            # Track statistics
            episode_reward += reward
            
            # Calculate waiting time and vehicle count for this step
            waiting_time = 0
            vehicle_count = 0
            
            for tl_id in env.traffic_lights:
                incoming_lanes = env._get_incoming_lanes(tl_id)
                
                for lane in incoming_lanes:
                    waiting_time += traci.lane.getWaitingTime(lane)
                    vehicle_count += traci.lane.getLastStepHaltingNumber(lane)
            
            step_waiting_times.append(waiting_time)
            step_vehicle_counts.append(vehicle_count)
            
            # Track vehicles that have completed their route
            vehicles_completed = traci.simulation.getArrivedNumber()
            
            # Move to next state
            state = next_state
        
        # Save episode results
        results['episode_rewards'].append(episode_reward)
        results['waiting_times'].append(step_waiting_times)
        results['vehicle_counts'].append(step_vehicle_counts)
        results['throughput'].append(vehicles_completed)
        
        # Print progress
        avg_waiting_time = np.mean(step_waiting_times) if step_waiting_times else 0
        avg_vehicle_count = np.mean(step_vehicle_counts) if step_vehicle_counts else 0
        
        print(f"Episode: {e+1}/{num_episodes}, Reward: {episode_reward:.2f}, "
              f"Avg Waiting Time: {avg_waiting_time:.2f}, "
              f"Avg Vehicles: {avg_vehicle_count:.2f}, "
              f"Throughput: {vehicles_completed}")
    
    # Save evaluation results
    with open(os.path.join(results_dir, 'evaluation_results.json'), 'w') as f:
        json.dump(results, f)
    
    # Plot evaluation results
    plot_evaluation_results(results, results_dir)
    
    # Close environment
    env.close()
    
    return results

def plot_evaluation_results(results, results_dir):
    """
    Plot evaluation results.
    
    Args:
        results: Dictionary of evaluation results
        results_dir: Directory to save plots
    """
    # Calculate summary statistics
    avg_reward = np.mean(results['episode_rewards'])
    avg_waiting_time = np.mean([np.mean(wt) for wt in results['waiting_times'] if wt])
    avg_vehicle_count = np.mean([np.mean(vc) for vc in results['vehicle_counts'] if vc])
    avg_throughput = np.mean(results['throughput'])
    
    # Plot episode rewards
    plt.figure(figsize=(10, 8))
    
    plt.subplot(2, 2, 1)
    plt.bar(range(len(results['episode_rewards'])), results['episode_rewards'])
    plt.axhline(y=avg_reward, color='r', linestyle='-', label=f'Avg: {avg_reward:.2f}')
    plt.title('Episode Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.legend()
    
    # Plot average waiting times
    plt.subplot(2, 2, 2)
    for i, wt in enumerate(results['waiting_times']):
        if wt:  # Check if not empty
            plt.plot(wt, alpha=0.3, label=f'Episode {i+1}' if i == 0 else "")
    plt.axhline(y=avg_waiting_time, color='r', linestyle='-', label=f'Avg: {avg_waiting_time:.2f}')
    plt.title('Waiting Times')
    plt.xlabel('Step')
    plt.ylabel('Time')
    plt.legend()
    
    # Plot vehicle counts
    plt.subplot(2, 2, 3)
    for i, vc in enumerate(results['vehicle_counts']):
        if vc:  # Check if not empty
            plt.plot(vc, alpha=0.3, label=f'Episode {i+1}' if i == 0 else "")
    plt.axhline(y=avg_vehicle_count, color='r', linestyle='-', label=f'Avg: {avg_vehicle_count:.2f}')
    plt.title('Vehicle Counts')
    plt.xlabel('Step')
    plt.ylabel('Count')
    plt.legend()
    
    # Plot throughput
    plt.subplot(2, 2, 4)
    plt.bar(range(len(results['throughput'])), results['throughput'])
    plt.axhline(y=avg_throughput, color='r', linestyle='-', label=f'Avg: {avg_throughput:.2f}')
    plt.title('Throughput (Completed Vehicles)')
    plt.xlabel('Episode')
    plt.ylabel('Count')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(results_dir, 'evaluation_results.png'))
    plt.close()
    
    # Summary text file
    with open(os.path.join(results_dir, 'evaluation_summary.txt'), 'w') as f:
        f.write(f"Average Reward: {avg_reward:.2f}\n")
        f.write(f"Average Waiting Time: {avg_waiting_time:.2f}\n")
        f.write(f"Average Vehicle Count: {avg_vehicle_count:.2f}\n")
        f.write(f"Average Throughput: {avg_throughput:.2f}\n")

In [None]:
# הרצת הערכה על המודל המאומן
# שים לב: המערכת תנסה לזהות את סוג הסוכן המתאים באופן אוטומטי
try:
    evaluation_results = evaluate_agent(
        sumo_config=sumo_config_path,
        agent_class=None,  # נסה לזהות אוטומטית
        model_dir="models",
        num_episodes=3,  # מספר קטן של אפיזודות להערכה
        results_dir="results"
    )
    print("ההערכה הושלמה בהצלחה!")
except Exception as e:
    print(f"שגיאה בהערכה: {e}")