In [None]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import random
from collections import defaultdict
import copy
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

np.random.seed(42)
torch.manual_seed(42)
random.seed(42)

from tqdm.notebook import tqdm


In [None]:
class NetworkTopology:
    def __init__(self, num_nodes, connectivity_prob=0.3, seed=None):

        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)

        self.num_nodes = num_nodes
        self.G = nx.Graph()

        # Add nodes with attributes
        for i in range(num_nodes):
            self.G.add_node(i,
                           bandwidth=random.randint(10, 100),  # Mbps
                           latency=random.uniform(1, 10),      # ms
                           queue_size=random.randint(50, 200), # packets
                           processing_power=random.uniform(0.5, 2.0))  # processing capability

        # Create edges with probability connectivity_prob
        for i in range(num_nodes):
            for j in range(i+1, num_nodes):
                if random.random() < connectivity_prob:
                    # Add edge with network attributes
                    self.G.add_edge(i, j,
                                  bandwidth=min(self.G.nodes[i]['bandwidth'],
                                               self.G.nodes[j]['bandwidth']),
                                  latency=random.uniform(5, 20),        # ms
                                  packet_loss=random.uniform(0, 0.05),  # 0-5% packet loss
                                  throughput=random.randint(100, 1000), # Mbps
                                  utilization=random.uniform(0.1, 0.5),  # 10-50% utilization
                                  congestion=random.uniform(0, 0.3))    # congestion level

        # Ensure the graph is connected
        if not nx.is_connected(self.G):
            components = list(nx.connected_components(self.G))
            for i in range(1, len(components)):
                node1 = random.choice(list(components[0]))
                node2 = random.choice(list(components[i]))
                self.G.add_edge(node1, node2,
                              bandwidth=min(self.G.nodes[node1]['bandwidth'],
                                           self.G.nodes[node2]['bandwidth']),
                              latency=random.uniform(5, 20),
                              packet_loss=random.uniform(0, 0.05),
                              throughput=random.randint(100, 1000),
                              utilization=random.uniform(0.1, 0.5),
                              congestion=random.uniform(0, 0.3))

        # Initialize traffic matrix and routing table
        self.initialize_traffic_matrix()
        self.initialize_routing_table()

        # Current network state
        self.current_time = 0
        self.packet_queue = defaultdict(list)  # {node_id: [packets]}
        self.delivered_packets = 0
        self.dropped_packets = 0
        self.total_delay = 0

    def initialize_traffic_matrix(self):
        """Initialize a traffic matrix between all pairs of nodes"""
        self.traffic_matrix = np.zeros((self.num_nodes, self.num_nodes))
        for i in range(self.num_nodes):
            for j in range(self.num_nodes):
                if i != j:
                    # Random traffic demand between nodes (packets per second)
                    self.traffic_matrix[i, j] = random.randint(5, 50)

    def initialize_routing_table(self):
        """Initialize routing tables using shortest path algorithm"""
        self.routing_table = {}
        # Calculate shortest paths based on latency
        for i in range(self.num_nodes):
            self.routing_table[i] = {}
            for j in range(self.num_nodes):
                if i != j:
                    try:
                        # Use latency as the weight for path calculation
                        path = nx.shortest_path(self.G, source=i, target=j,
                                               weight=lambda u, v, d: d['latency'])
                        self.routing_table[i][j] = path
                    except nx.NetworkXNoPath:
                        self.routing_table[i][j] = None

    def update_routing_table(self, new_routing_table):
        """Update the routing table (will be used by the agents)"""
        self.routing_table = new_routing_table

    def generate_packet(self, source, destination, size=1024):
        """Generate a packet with given parameters"""
        return {
            'id': f"pkt_{source}_{destination}_{self.current_time}_{random.randint(0, 1000)}",
            'source': source,
            'destination': destination,
            'size': size,  # bytes
            'creation_time': self.current_time,
            'current_node': source,
            'next_hop': None,
            'hops': 0,
            'delay': 0
        }

    def step(self, time_step=1):
        """Simulate network for one time step"""
        self.current_time += time_step

        # Generate new packets according to traffic matrix
        for source in range(self.num_nodes):
            for destination in range(self.num_nodes):
                if source != destination:
                    # Number of packets to generate based on traffic rate
                    num_packets = np.random.poisson(self.traffic_matrix[source, destination] * time_step / 60)
                    for _ in range(num_packets):
                        packet = self.generate_packet(source, destination)
                        self.packet_queue[source].append(packet)

        # Process packets at each node
        for node in range(self.num_nodes):
            # Get processing capacity
            processing_power = self.G.nodes[node]['processing_power']
            queue_size = self.G.nodes[node]['queue_size']

            # Check if queue is overflowing
            if len(self.packet_queue[node]) > queue_size:
                # Drop packets that exceed queue size
                dropped = len(self.packet_queue[node]) - queue_size
                self.dropped_packets += dropped
                self.packet_queue[node] = self.packet_queue[node][:queue_size]

            # Process packets based on node's processing power
            packets_to_process = min(int(processing_power * 10), len(self.packet_queue[node]))

            for _ in range(packets_to_process):
                if not self.packet_queue[node]:
                    break

                packet = self.packet_queue[node].pop(0)

                # If packet reached destination
                if packet['current_node'] == packet['destination']:
                    self.delivered_packets += 1
                    delay = self.current_time - packet['creation_time']
                    self.total_delay += delay
                    continue

                # Find next hop using routing table
                route = self.routing_table[packet['current_node']][packet['destination']]
                if not route:
                    # No route to destination, drop packet
                    self.dropped_packets += 1
                    continue

                if len(route) > 1:
                    next_hop = route[1]  # Next node in the path

                    # Check link conditions
                    edge_data = self.G.get_edge_data(packet['current_node'], next_hop)

                    # Check for packet loss
                    if random.random() < edge_data['packet_loss']:
                        self.dropped_packets += 1
                        continue

                    # Update packet information
                    packet['current_node'] = next_hop
                    packet['hops'] += 1
                    packet['delay'] += edge_data['latency']

                    # Add to next node's queue
                    self.packet_queue[next_hop].append(packet)

                    # Update link utilization
                    edge_data['utilization'] += packet['size'] / (edge_data['bandwidth'] * 1024 * 1024)
                    if edge_data['utilization'] > 1:
                        edge_data['utilization'] = 1
                        edge_data['congestion'] += 0.1
                        if edge_data['congestion'] > 1:
                            edge_data['congestion'] = 1

        # Decay link utilization and congestion over time
        for u, v, data in self.G.edges(data=True):
            data['utilization'] *= 0.95  # 5% decay per time step
            data['congestion'] *= 0.9    # 10% decay per time step

        # Return network performance metrics
        return self.get_performance_metrics()

    def get_performance_metrics(self):
        """Calculate and return network performance metrics"""
        # Calculate average metrics across the network
        avg_latency = 0
        avg_packet_loss = 0
        avg_throughput = 0
        avg_utilization = 0
        avg_congestion = 0

        if self.G.number_of_edges() > 0:
            for u, v, data in self.G.edges(data=True):
                avg_latency += data['latency']
                avg_packet_loss += data['packet_loss']
                avg_throughput += data['throughput']
                avg_utilization += data['utilization']
                avg_congestion += data['congestion']

            avg_latency /= self.G.number_of_edges()
            avg_packet_loss /= self.G.number_of_edges()
            avg_throughput /= self.G.number_of_edges()
            avg_utilization /= self.G.number_of_edges()
            avg_congestion /= self.G.number_of_edges()

        # Calculate end-to-end metrics
        packet_delivery_ratio = 0
        avg_end_to_end_delay = 0

        total_packets = max(1, self.delivered_packets + self.dropped_packets)  # Avoid division by zero
        packet_delivery_ratio = self.delivered_packets / total_packets

        if self.delivered_packets > 0:
            avg_end_to_end_delay = self.total_delay / self.delivered_packets

        return {
            'avg_latency': avg_latency,
            'avg_packet_loss': avg_packet_loss,
            'avg_throughput': avg_throughput,
            'avg_utilization': avg_utilization,
            'avg_congestion': avg_congestion,
            'packet_delivery_ratio': packet_delivery_ratio,
            'avg_end_to_end_delay': avg_end_to_end_delay,
            'delivered_packets': self.delivered_packets,
            'dropped_packets': self.dropped_packets
        }

    def visualize_network(self):
        """Visualize the network topology"""
        plt.figure(figsize=(12, 8))

        # Position nodes using spring layout
        pos = nx.spring_layout(self.G, seed=42)

        # Draw nodes
        node_sizes = [self.G.nodes[n]['bandwidth'] * 5 for n in self.G.nodes()]
        nx.draw_networkx_nodes(self.G, pos, node_size=node_sizes, node_color='skyblue', alpha=0.8)

        # Draw edges with width based on bandwidth and color based on congestion
        edge_widths = [self.G[u][v]['bandwidth']/20 for u, v in self.G.edges()]
        edge_colors = [self.G[u][v]['congestion'] for u, v in self.G.edges()]
        nx.draw_networkx_edges(self.G, pos, width=edge_widths, edge_color=edge_colors,
                               edge_cmap=plt.cm.RdYlGn_r, alpha=0.7)

        # Add labels
        nx.draw_networkx_labels(self.G, pos, font_size=10)

        # Add edge labels (simplified to show just latency)
        edge_labels = {(u, v): f"{d['latency']:.1f}ms" for u, v, d in self.G.edges(data=True)}
        nx.draw_networkx_edge_labels(self.G, pos, edge_labels=edge_labels, font_size=8)

        plt.title("Network Topology Visualization")
        plt.axis('off')
        plt.tight_layout()
        plt.show()

    def reset(self):
        """Reset the network to initial state"""
        # Reset traffic and queues
        self.current_time = 0
        self.packet_queue = defaultdict(list)
        self.delivered_packets = 0
        self.dropped_packets = 0
        self.total_delay = 0

        # Reset edge attributes
        for u, v, data in self.G.edges(data=True):
            data['utilization'] = random.uniform(0.1, 0.5)
            data['congestion'] = random.uniform(0, 0.3)

        # Re-initialize traffic matrix
        self.initialize_traffic_matrix()

        # Return initial observation
        return self.get_performance_metrics()


In [None]:
class BaselineAgent:
    def __init__(self, network_topology):
        self.network = network_topology

    def get_action(self, state):

        # No action needed, routing is pre-calculated
        return None  # Indicate no action to be taken

    def initialize_routing_table(self):

        self.routing_table = {}
        for i in range(self.network.num_nodes):
            self.routing_table[i] = {}
            for j in range(self.network.num_nodes):
                if i != j:
                    try:
                        # Calculate shortest path based on latency
                        path = nx.shortest_path(
                            self.network.G,
                            source=i,
                            target=j,
                            weight=lambda u, v, d: d['latency'],
                        )
                        self.routing_table[i][j] = path
                    except nx.NetworkXNoPath:
                        self.routing_table[i][j] = None  # No path found

        # Update the network's routing table
        self.network.update_routing_table(self.routing_table)

In [None]:
class NetworkEnvironment:
    def __init__(self, network_topology, max_steps=100):

        self.network = network_topology
        self.max_steps = max_steps
        self.current_step = 0
        self.reset()

    def action_to_routing_table(self, action, src, dst, next_hop):

        # Only update if it's a valid routing decision
        if src != dst and src != next_hop:
            # Create a copy of the current routing table
            new_routing_table = copy.deepcopy(self.network.routing_table)

            # Check if there's a path from source to destination
            if dst in new_routing_table[src] and new_routing_table[src][dst]:
                # Check if next_hop is a neighbor of source
                if self.network.G.has_edge(src, next_hop):
                    # Find a path from next_hop to destination
                    if dst in new_routing_table[next_hop] and new_routing_table[next_hop][dst]:
                        # Construct new path: source -> next_hop -> ... -> destination
                        new_path = [src] + new_routing_table[next_hop][dst]
                        new_routing_table[src][dst] = new_path

            return new_routing_table

        # If invalid action, return current routing table (no change)
        return copy.deepcopy(self.network.routing_table)

    def reset(self):

        self.current_step = 0
        self.network.reset()
        return self._get_state()

    def _get_state(self):

        state = []

        # Edge features
        edge_list = list(self.network.G.edges(data=True))
        for u, v, data in edge_list:
            state.extend([
                data['bandwidth'] / 100,  # Normalize
                data['latency'] / 20,     # Normalize
                data['packet_loss'],
                data['utilization'],
                data['congestion']
            ])

        # Node features
        for node, data in self.network.G.nodes(data=True):
            state.extend([
                data['queue_size'] / 200,  # Normalize
                data['processing_power']
            ])

        # Global metrics
        metrics = self.network.get_performance_metrics()
        state.extend([
            metrics['packet_delivery_ratio'],
            metrics['avg_end_to_end_delay'] / 100,  # Normalize
            metrics['avg_congestion']
        ])

        return np.array(state, dtype=np.float32)

    def step(self, action_tuple):

        if action_tuple is not None:  # For RL agents
            action_type, src, dst, next_hop = action_tuple
            # Update routing table based on action
            new_routing_table = self.action_to_routing_table(action_type, src, dst, next_hop)
            self.network.update_routing_table(new_routing_table)

        # Otherwise for static routing, no update needed

        # Simulate the network for one time step
        metrics_before = self.network.get_performance_metrics()
        metrics_after = self.network.step()

        # Calculate reward based on network performance metrics
        reward = self._calculate_reward(metrics_before, metrics_after)

        # Increment step counter
        self.current_step += 1

        # Check if the episode is done
        done = self.current_step >= self.max_steps

        # Additional information
        info = metrics_after

        return self._get_state(), reward, done, info

    def _calculate_reward(self, metrics_before, metrics_after):

        # Enhanced reward components
        delivery_improvement = (metrics_after['packet_delivery_ratio'] - metrics_before['packet_delivery_ratio']) * 200
        latency_improvement = (metrics_before['avg_latency'] - metrics_after['avg_latency']) / max(1, metrics_before['avg_latency']) * 5000
        throughput_improvement = (metrics_after['avg_throughput'] - metrics_before['avg_throughput']) / 1000 * 30


        return delivery_improvement + latency_improvement + throughput_improvement


    def render(self):

        metrics = self.network.get_performance_metrics()
        print(f"Step: {self.current_step}")
        print(f"Packet Delivery Ratio: {metrics['packet_delivery_ratio']:.4f}")
        print(f"Average End-to-End Delay: {metrics['avg_end_to_end_delay']:.2f}ms")
        print(f"Average Throughput: {metrics['avg_throughput']:.2f}Mbps")
        print(f"Delivered/Dropped Packets: {metrics['delivered_packets']}/{metrics['dropped_packets']}")
        print("-" * 50)


In [None]:
class ActorNetwork(nn.Module):
    def __init__(self, num_nodes, input_dim, hidden_dim=256):

        super(ActorNetwork, self).__init__()

        # Number of nodes in the network
        self.num_nodes = num_nodes # Fixed for our experiments

        # Network layers
        self.shared = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )

        # Action type (add/modify/remove route)
        self.action_head = nn.Sequential(
            nn.Linear(hidden_dim, 3),
            nn.Softmax(dim=-1)
        )

        # Source node selection
        self.src_head = nn.Sequential(
            nn.Linear(hidden_dim, self.num_nodes),
            nn.Softmax(dim=-1)
        )

        # Destination node selection
        self.dst_head = nn.Sequential(
            nn.Linear(hidden_dim, self.num_nodes),
            nn.Softmax(dim=-1)
        )

        # Next hop selection
        self.next_hop_head = nn.Sequential(
            nn.Linear(hidden_dim, self.num_nodes),
            nn.Softmax(dim=-1)
        )

    def forward(self, x):
        """Forward pass through the network"""
        shared_features = self.shared(x)

        action_probs = self.action_head(shared_features)
        src_probs = self.src_head(shared_features)
        dst_probs = self.dst_head(shared_features)
        next_hop_probs = self.next_hop_head(shared_features)

        return action_probs, src_probs, dst_probs, next_hop_probs

class CriticNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim=256):

        super(CriticNetwork, self).__init__()

        self.critic = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        """Forward pass through the network"""
        value = self.critic(x)
        return value

class PPOAgent:
    def __init__(self, state_dim, hidden_dim=256, lr_actor=0.0003, lr_critic=0.001,
                 gamma=0.99, gae_lambda=0.95, clip_epsilon=0.2, epochs=10, device='cpu', num_nodes=10):

        self.actor = ActorNetwork(num_nodes, state_dim, hidden_dim).to(device)
        self.critic = CriticNetwork(state_dim, hidden_dim).to(device)

        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr_critic)

        self.num_nodes = num_nodes  # Fixed for our experiments
        self.gamma = gamma
        self.gae_lambda = gae_lambda
        self.clip_epsilon = clip_epsilon
        self.epochs = epochs
        self.device = device

        # Memory buffers
        self.states = []
        self.actions = []
        self.log_probs = []
        self.rewards = []
        self.dones = []
        self.values = []

        # Training metrics
        self.actor_losses = []
        self.critic_losses = []
        self.episode_rewards = []

    def get_action(self, state):

        state_tensor = torch.FloatTensor(state).to(self.device)

        # Get action probabilities
        with torch.no_grad():
            action_probs, src_probs, dst_probs, next_hop_probs = self.actor(state_tensor)
            value = self.critic(state_tensor)

        # Sample action components
        # Select one action probability and reshape
        action_probs = action_probs.squeeze(0).view(3)  # Reshape to [3]

        # Reshape src_probs, dst_probs, next_hop_probs for Categorical
        src_probs = src_probs.squeeze(0).view(self.num_nodes)
        dst_probs = dst_probs.squeeze(0).view(self.num_nodes)
        next_hop_probs = next_hop_probs.squeeze(0).view(self.num_nodes)


        action_dist = Categorical(action_probs)
        src_dist = Categorical(src_probs)
        dst_dist = Categorical(dst_probs)
        next_hop_dist = Categorical(next_hop_probs)

        action_type = action_dist.sample().item()
        src = src_dist.sample().item()
        dst = dst_dist.sample().item()
        next_hop = next_hop_dist.sample().item()

        # Calculate log probability
        log_prob = (action_dist.log_prob(torch.tensor(action_type)) +
                   src_dist.log_prob(torch.tensor(src)) +
                   dst_dist.log_prob(torch.tensor(dst)) +
                   next_hop_dist.log_prob(torch.tensor(next_hop)))

        return (action_type, src, dst, next_hop), log_prob, value.item()

    def remember(self, state, action, log_prob, reward, done, value):
        """Store experience in memory"""
        self.states.append(state)
        self.actions.append(action)
        self.log_probs.append(log_prob)
        self.rewards.append(reward)
        self.dones.append(done)
        self.values.append(value)

    def clear_memory(self):
        """Clear memory buffers"""
        self.states = []
        self.actions = []
        self.log_probs = []
        self.rewards = []
        self.dones = []
        self.values = []

    def compute_gae(self, next_value):
        """Compute Generalized Advantage Estimation"""
        values = self.values + [next_value]
        advantages = []
        returns = []
        gae = 0

        for i in reversed(range(len(self.rewards))):
            delta = self.rewards[i] + self.gamma * values[i+1] * (1 - self.dones[i]) - values[i]
            gae = delta + self.gamma * self.gae_lambda * (1 - self.dones[i]) * gae
            advantages.insert(0, gae)
            returns.insert(0, gae + values[i])

        return advantages, returns

    def update(self, next_value):

        # Convert to tensors
        states = torch.FloatTensor(np.array(self.states)).to(self.device)
        actions = self.actions  # List of tuples
        old_log_probs = torch.stack(self.log_probs).to(self.device)

        # Compute advantages and returns
        advantages, returns = self.compute_gae(next_value)
        advantages = torch.FloatTensor(advantages).to(self.device)
        returns = torch.FloatTensor(returns).to(self.device)

        # Normalize advantages
        if len(advantages) > 1:
            advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

        # Extract action components
        action_types = torch.tensor([a[0] for a in actions]).to(self.device)
        srcs = torch.tensor([a[1] for a in actions]).to(self.device)
        dsts = torch.tensor([a[2] for a in actions]).to(self.device)
        next_hops = torch.tensor([a[3] for a in actions]).to(self.device)

        # PPO update
        total_actor_loss = 0
        total_critic_loss = 0

        for _ in range(self.epochs):
            # Forward pass through actor and critic
            action_probs, src_probs, dst_probs, next_hop_probs = self.actor(states)
            values = self.critic(states).squeeze()

            # Create distributions
            action_dist = Categorical(action_probs)
            src_dist = Categorical(src_probs)
            dst_dist = Categorical(dst_probs)
            next_hop_dist = Categorical(next_hop_probs)

            # Calculate new log probabilities
            new_log_probs = (action_dist.log_prob(action_types) +
                            src_dist.log_prob(srcs) +
                            dst_dist.log_prob(dsts) +
                            next_hop_dist.log_prob(next_hops))

            # Calculate entropy (for exploration)
            entropy = (action_dist.entropy().mean() +
                      src_dist.entropy().mean() +
                      dst_dist.entropy().mean() +
                      next_hop_dist.entropy().mean())

            # Calculate ratio for PPO
            ratio = torch.exp(new_log_probs - old_log_probs)

            # PPO surrogate losses
            surr1 = ratio * advantages
            surr2 = torch.clamp(ratio, 1.0 - self.clip_epsilon, 1.0 + self.clip_epsilon) * advantages

            # Actor loss (-min because we're minimizing)
            actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy

            # Critic loss (MSE)
            critic_loss = F.mse_loss(values, returns)

            # Update networks
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            torch.nn.utils.clip_grad_norm_(self.actor.parameters(), 0.5)
            self.actor_optimizer.step()

            self.critic_optimizer.zero_grad()
            critic_loss.backward()
            torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 0.5)
            self.critic_optimizer.step()

            total_actor_loss += actor_loss.item()
            total_critic_loss += critic_loss.item()

        # Store losses
        avg_actor_loss = total_actor_loss / self.epochs
        avg_critic_loss = total_critic_loss / self.epochs
        self.actor_losses.append(avg_actor_loss)
        self.critic_losses.append(avg_critic_loss)

        # Clear memory
        self.clear_memory()

        return avg_actor_loss, avg_critic_loss


In [None]:
def train_agent(env, agent, num_episodes=300, max_steps=100, update_frequency=20):

    episode_rewards = []
    best_reward = float('-inf')
    best_routing = None

    # For storing metrics
    delivery_ratios = []
    avg_latencies = []

    for episode in tqdm(range(num_episodes)):
        state = env.reset()
        episode_reward = 0
        step_count = 0

        # For PPO updates
        states = []
        actions = []
        log_probs = []
        rewards = []
        dones = []
        values = []

        for step in range(max_steps):
            # Select action
            action_tuple, log_prob, value = agent.get_action(state)

            # Take action in environment
            next_state, reward, done, info = env.step(action_tuple)

            # Store experience
            states.append(state)
            actions.append(action_tuple)
            log_probs.append(log_prob)
            rewards.append(reward)
            dones.append(done)
            values.append(value)

            # Update counters
            episode_reward += reward
            state = next_state
            step_count += 1

            if done:
                break

        # Track best routing configuration
        if episode_reward > best_reward:
            best_reward = episode_reward
            best_routing = copy.deepcopy(env.network.routing_table)
            print(f"New best reward: {best_reward:.2f} at episode {episode+1}")

        # Store metrics
        delivery_ratios.append(info['packet_delivery_ratio'])
        avg_latencies.append(info['avg_end_to_end_delay'])

        # Update agent using PPO (once per episode)
        next_value = 0 if done else agent.get_action(next_state)[2]

        # Store experiences in agent memory
        for i in range(len(states)):
            agent.remember(states[i], actions[i], log_probs[i], rewards[i], dones[i], values[i])

        # Update if we have enough data
        if (episode + 1) % update_frequency == 0:
            actor_loss, critic_loss = agent.update(next_value)
            print(f"Episode {episode+1}: Reward={episode_reward:.2f}, Actor Loss={actor_loss:.4f}, Critic Loss={critic_loss:.4f}")

        # Store episode reward
        episode_rewards.append(episode_reward)
        agent.episode_rewards.append(episode_reward)

    # Apply best routing at the end
    if best_routing is not None:
        env.network.update_routing_table(best_routing)

    # Return the trained agent and metrics
    return agent, {
        'rewards': episode_rewards,
        'delivery_ratios': delivery_ratios,
        'avg_latencies': avg_latencies
    }

def evaluate_routing(env, num_iterations=50, max_steps=100):

    total_rewards = []
    delivery_ratios = []
    avg_latencies = []
    avg_throughputs = []
    avg_packet_losses = []

    for _ in tqdm(range(num_iterations)):
        state = env.reset()
        episode_reward = 0

        for step in range(max_steps):
            # Use current routing (no action)
            _, reward, done, info = env.step(None)
            episode_reward += reward

            if done:
                break

        # Store metrics
        total_rewards.append(episode_reward)
        delivery_ratios.append(info['packet_delivery_ratio'])
        avg_latencies.append(info['avg_end_to_end_delay'])
        avg_throughputs.append(info['avg_throughput'])
        avg_packet_losses.append(info['avg_packet_loss'])

    # Calculate averages
    return {
        'avg_reward': np.mean(total_rewards),
        'std_reward': np.std(total_rewards),
        'avg_delivery_ratio': np.mean(delivery_ratios),
        'avg_latency': np.mean(avg_latencies),
        'avg_throughput': np.mean(avg_throughputs),
        'avg_packet_loss': np.mean(avg_packet_losses)
    }

def plot_training_metrics(ppo_metrics):
    """Plot training metrics for both agents"""
    plt.figure(figsize=(15, 10))

    # Episode rewards
    plt.subplot(2, 2, 1)
    plt.plot(ppo_metrics['rewards'], label='Standard PPO')
    plt.title('Episode Rewards During Training')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.legend()

    # Delivery ratios
    plt.subplot(2, 2, 2)
    plt.plot(ppo_metrics['delivery_ratios'], label='Standard PPO')
    plt.title('Packet Delivery Ratio During Training')
    plt.xlabel('Episode')
    plt.ylabel('Delivery Ratio')
    plt.legend()

    # Average latencies
    plt.subplot(2, 2, 3)
    plt.plot(ppo_metrics['avg_latencies'], label='Standard PPO')
    plt.title('Average End-to-End Delay During Training')
    plt.xlabel('Episode')
    plt.ylabel('Delay (ms)')
    plt.legend()

    # Moving average of rewards
    plt.subplot(2, 2, 4)
    window_size = 10
    ppo_smooth = np.convolve(ppo_metrics['rewards'], np.ones(window_size)/window_size, mode='valid')
    plt.plot(ppo_smooth, label='Standard PPO')
    plt.title(f'Smoothed Rewards (Window={window_size})')
    plt.xlabel('Episode')
    plt.ylabel('Smoothed Reward')
    plt.legend()

    plt.tight_layout()
    plt.savefig('training_comparison.png')
    plt.show()

def plot_comparative_metrics(baseline_metrics, ppo_metrics):
    """Plot comparison of final performance metrics"""
    metrics = ['avg_reward', 'avg_delivery_ratio', 'avg_latency', 'avg_throughput', 'avg_packet_loss']
    labels = ['Average Reward', 'Delivery Ratio', 'Latency (ms)', 'Throughput (Mbps)', 'Packet Loss']

    # Calculate improvements
    ppo_improvement = [(ppo_metrics[m] - baseline_metrics[m]) / abs(baseline_metrics[m]) * 100 for m in metrics]

    # For metrics where lower is better (latency, packet loss), invert the sign
    for i in [2, 4]:  # Indices for latency and packet loss
        ppo_improvement[i] = -ppo_improvement[i]

    plt.figure(figsize=(12, 6))

    # Improvement percentage
    x = np.arange(len(metrics))
    width = 0.35

    plt.bar(x - width/2, ppo_improvement, width, label='Standard PPO')

    plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
    plt.xticks(x, labels)
    plt.ylabel('Improvement over Baseline (%)')
    plt.title('Performance Improvement Comparison')
    plt.legend()

    plt.tight_layout()
    plt.savefig('improvement_comparison.png')
    plt.show()


In [None]:
# Create the network and environment
network = NetworkTopology(num_nodes=16, connectivity_prob=0.4, seed=42)
env = NetworkEnvironment(network, max_steps=100)
# Visualize the network
network.visualize_network()

In [None]:



# First: Evaluate baseline (shortest path routing)
print("Evaluating baseline (shortest path routing)...")
# Example usage
baseline_agent = BaselineAgent(network)
baseline_agent.initialize_routing_table()  # Initialize shortest-path routing
baseline_metrics = evaluate_routing(env, num_iterations=50, max_steps=100)  # Evaluate
print("\nBaseline Metrics:")
for key, value in baseline_metrics.items():
    print(f"{key}: {value:.4f}")

# Save original routing table
original_routing = copy.deepcopy(network.routing_table)

# Train Standard PPO Agent
print("\nTraining standard PPO agent...")
state_dim = len(env._get_state())
ppo_agent = PPOAgent(
    state_dim=state_dim,
    hidden_dim=256,
    lr_actor=0.0003,
    lr_critic=0.001,
    gamma=0.99,
    gae_lambda=0.95,
    clip_epsilon=0.2,
    epochs=10,
    device='cpu',
    num_nodes=network.num_nodes
)

trained_ppo, ppo_metrics = train_agent(env, ppo_agent, num_episodes=300, max_steps=100)

# Evaluate PPO agent
print("\nEvaluating standard PPO agent...")
ppo_eval_metrics = evaluate_routing(env)
print("\nStandard PPO Metrics:")
for key, value in ppo_eval_metrics.items():
    print(f"{key}: {value:.4f}")

# Save PPO routing
ppo_routing = copy.deepcopy(network.routing_table)

# Restore original routing
network.update_routing_table(original_routing)



In [None]:
# Plot training metrics
plot_training_metrics(ppo_metrics)

# Plot comparative metrics
plot_comparative_metrics(baseline_metrics, ppo_eval_metrics)

# Print final comparison
print("\n==== FINAL COMPARISON ====")
print(f"Metric                  | Baseline      | Standard PPO  )
print(f"------------------------|---------------|---------------|--------")

metrics_to_show = [
    ('avg_reward', 'Average Reward'),
    ('avg_delivery_ratio', 'Delivery Ratio'),
    ('avg_latency', 'Latency (ms)'),
    ('avg_throughput', 'Throughput (Mbps)'),
    ('avg_packet_loss', 'Packet Loss')
]

for metric, label in metrics_to_show:
    baseline = baseline_metrics[metric]
    ppo = ppo_eval_metrics[metric]

    # Calculate improvements
    if metric in ['avg_latency', 'avg_packet_loss']:  # Lower is better
        ppo_imp = (baseline - ppo) / baseline * 100
    else:  # Higher is better
        ppo_imp = (ppo - baseline) / baseline * 100

    print(f"{label:23} | {baseline:13.4f} | {ppo:13.4f} | {ppo_imp:11.2f}%")

# Visualize the network with best routing
network.visualize_network()
