#### Simple Multi-Agent System MAS using GNN + RL

In [1]:
import numpy as np
import networkx as nx
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, SAGEConv
from torch_geometric.utils import from_networkx

num_nodes = 100
num_edges = 35
in_channels = 16
hidden_channels = 32
out_channels = 25
time_steps = 100_000

In [2]:
G = nx.gnm_random_graph(num_nodes, num_edges)

np.random.seed(42)
water_levels = np.random.uniform(low=0.5, high=2.0, size=(time_steps, num_nodes))
inflow_rates = np.random.uniform(low=0.1, high=0.5, size=(time_steps, num_nodes))
outflow_rates = np.random.uniform(low=0.1, high=0.5, size=(time_steps, num_nodes))
valve_positions = np.random.uniform(low=0.0, high=1.0, size=(time_steps, num_nodes))

flow_capacities = np.random.uniform(low=1.0, high=3.0, size=(num_edges,))
distances = np.random.uniform(low=0.5, high=5.0, size=(num_edges,))

data = {
    'time_step': np.repeat(np.arange(time_steps), num_nodes),
    'node': np.tile(np.arange(num_nodes), time_steps),
    'water_level': water_levels.flatten(),
    'inflow_rate': inflow_rates.flatten(),
    'outflow_rate': outflow_rates.flatten(),
    'valve_position': valve_positions.flatten(),
}
df_nodes = pd.DataFrame(data)

edge_list = np.array(G.edges())
df_edges = pd.DataFrame({
    'edge_index': np.arange(num_edges),
    'source_node': edge_list[:, 0],
    'target_node': edge_list[:, 1],
    'flow_capacity': flow_capacities,
    'distance': distances,
})

display(df_nodes.head())
display(df_edges.head())

Unnamed: 0,time_step,node,water_level,inflow_rate,outflow_rate,valve_position
0,0,0,1.06181,0.470615,0.252714,0.773222
1,0,1,1.926071,0.252984,0.453712,0.02287
2,0,2,1.597991,0.448588,0.280232,0.135256
3,0,3,1.397988,0.404589,0.455681,0.547153
4,0,4,0.734028,0.23153,0.260071,0.112734


Unnamed: 0,edge_index,source_node,target_node,flow_capacity,distance
0,0,1,10,2.231751,1.086002
1,1,2,88,2.050334,2.98607
2,2,2,90,1.094709,1.904464
3,3,5,54,1.661716,4.91384
4,4,8,27,1.825759,3.517917


In [3]:
# NOTE Conver to Tensor Data
G = nx.gnm_random_graph(num_nodes, num_edges)
# Water Level
node_features = torch.tensor(np.random.uniform(low=0.5, high=2.0, size=(num_nodes, in_channels)), dtype=torch.float)
water_data = from_networkx(G)
water_data.x = node_features

##### Simple GNN Architecture
$$
h_v^{(k+1)} = \sigma \left( \sum_{u \in \mathcal{N}(v)} \frac{1}{\sqrt{d_u d_v}} W^{(k)} h_u^{(k)} \right)
$$

where $ h_v^{(k)} $ represents the node features at layer $ k $, and $ W^{(k)} $ is the weight matrix.

In [4]:
# Graph Convolutional Network
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.conv4 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.3)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.3)
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.3)
        x = self.conv4(x, edge_index)
        return x
    
class GSage(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GSage, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        # x = F.relu(x)
        x = F.sigmoid(x)
        x = self.conv2(x, edge_index)
        x = F.sigmoid(x)
        x = self.conv3(x, edge_index)
        return x

##### Reinforcement Learning (Policy Network)
The policy network in the RL model maps the state to an action:

$$
\text{Action} = \text{PolicyNetwork}(State)
$$

In [5]:
class PolicyNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.tanh(self.fc3(x))

# NOTE Reward Systems
def environment_step(state, action):
    # NOTE Simulate the effect of the action on the environment
    adjusted_outflow = state[:, 2] + action.squeeze()
    adjusted_outflow = torch.clamp(adjusted_outflow, 0, 1)

    new_water_level = state[:, 0] + state[:, 1] - adjusted_outflow
    new_water_level = torch.clamp(new_water_level, 0, 2)
    
    new_state = torch.stack((new_water_level, state[:, 1], adjusted_outflow, state[:, 3]), dim=1)
    overflow_pen = overflow_penalty(new_state)
    stability = stability_reward(new_state)
    reward = overflow_pen + stability
    return new_state, reward

def overflow_penalty(state): # Negative reward if water level exceeds a threshold
    return torch.sum(torch.clamp(state[:, 0] - 1.5, min=0))

def stability_reward(state): # Reward for maintaining stable water levels
    return -torch.var(state[:, 0])


##### GNN + RL Combined

In [6]:
gnn = GCN(in_channels=in_channels, hidden_channels=hidden_channels, out_channels=out_channels)
# gnn = GSage(in_channels=in_channels, hidden_channels=hidden_channels, out_channels=out_channels)
policy_network = PolicyNetwork(input_dim=out_channels, output_dim=1)

num_episodes = 100 # NOTE Test with 100
optimizer = torch.optim.Adam(
    list(gnn.parameters()) + list(policy_network.parameters()), 
    lr=1e-3, weight_decay=1e-3
)
best_loss = float("inf")
cnt_grad_explosion = 0
for episode in range(num_episodes):
    node_representations = gnn(water_data)
    graph_representation = torch.mean(node_representations, dim=0) # Create Graph Representation
    
    action = policy_network(graph_representation)
    new_state, reward = environment_step(node_representations, action)  # Environment update

    loss = -reward.mean()  # Negative reward to maximize reward
    if abs(loss) == 0: cnt_grad_explosion += 1

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if loss < best_loss: best_loss = loss
    print(f'Episode {episode}: Loss = {loss.item()}')

print(f'Final Loss: {loss.item()} | Grad Explosion {(cnt_grad_explosion/num_episodes)*100:.5f}%')

Episode 0: Loss = -0.45930910110473633
Episode 1: Loss = -0.458012193441391
Episode 2: Loss = -0.4414289891719818
Episode 3: Loss = -0.46000224351882935
Episode 4: Loss = -0.46000000834465027
Episode 5: Loss = -0.0
Episode 6: Loss = 3.719362575793639e-05
Episode 7: Loss = -0.9208080768585205
Episode 8: Loss = -0.0
Episode 9: Loss = -0.0
Episode 10: Loss = -0.46000000834465027
Episode 11: Loss = -0.0
Episode 12: Loss = -0.46000000834465027
Episode 13: Loss = -0.0
Episode 14: Loss = -0.0
Episode 15: Loss = -0.0
Episode 16: Loss = -0.46000000834465027
Episode 17: Loss = -0.0
Episode 18: Loss = -0.0
Episode 19: Loss = -0.0
Episode 20: Loss = -0.46000000834465027
Episode 21: Loss = -0.0
Episode 22: Loss = -0.0
Episode 23: Loss = 0.00027018110267817974
Episode 24: Loss = -0.0
Episode 25: Loss = -0.0
Episode 26: Loss = -0.0
Episode 27: Loss = -0.0
Episode 28: Loss = -0.0
Episode 29: Loss = -0.0
Episode 30: Loss = -0.0
Episode 31: Loss = -0.0
Episode 32: Loss = -0.46000000834465027
Episode 33:

In the GAT layer, the node representations are updated using attention mechanisms:

$$
h_v' = \sum_{u \in \mathcal{N}(v)} \alpha_{vu} W h_u
$$

where $ \alpha_{vu} $ is the attention coefficient between node $ v $ and node $ u $.