# Generate training data

256×169 samples</p>
### Change demand
timestep = 1h, 1 week ->169 timesteps in total</p>
at every timestep, change the nodal demand</p>
### Change topology
Randomly cut the connectivity between two pipes, but preserving the connectivity between each node in the network and source. ->256 topology

### Try single timestep

In [1]:
import wntr
import networkx as nx
import scipy.sparse as sp
import numpy as np 
import random
import tqdm
import os 
import matplotlib.pyplot as plt
import itertools
from collections import Counter 
import networkx as nx
import copy
import pandas as pd
import tensorflow_gnn as tfgnn
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [2]:
# To use standard datasets that fit in memory.
from tensorflow_gnn.experimental.in_memory import datasets

# Implementations of example GNN models.
from tensorflow_gnn.experimental.in_memory import models

# Converts `tfgnn.GraphTensor` to (`tfgnn.GraphTensor`, `tf.Tensor`)
# with second item containing task labels.
from tensorflow_gnn.experimental.in_memory import reader_utils

# For on-the-fly subgraph sampling.
from tensorflow_gnn.sampler import sampling_spec_builder

In [3]:
def get_removable_links(inp_file):
    wn = wntr.network.WaterNetworkModel(inp_file)
    removable_links = []
    G = wn.to_graph(wn)
    # Store not-deadend pipes to be removed
    for _, link in wn.links():
        if (link.link_type == 'Pipe' and
            link.start_node.node_type == 'Junction' and
            link.end_node.node_type == 'Junction' and
            G.degree[link.start_node.name] > 1 and
            G.degree[link.end_node.name] > 1):
            removable_links.append(link)
    # To find pairs of links that can be removed together while keeping the network graph connected
    removable_pairs = []
    for (link1, link2) in itertools.combinations(removable_links, 2):
        wnr = copy.deepcopy(wn)  #create a new reference to the same object
        wnr.remove_link(link1)
        wnr.remove_link(link2)
        Gr = wnr.to_graph().to_undirected()
        if nx.is_connected(Gr):
            removable_pairs.append((link1, link2))

    return removable_links, removable_pairs


In [4]:
network = 'ASnet2'
inp_file = network + '.inp'
wn = wntr.network.WaterNetworkModel(inp_file)

# get removable links
removable_links, removable_pairs = get_removable_links(inp_file)
num_nodes = wn.num_nodes
num_links = wn.num_links
num_time = 5
#num_time = int(wn.options.time.duration / wn.options.time.report_timestep + 1)

In [5]:
n_sims = 256
#𝑉_𝑖=[ 𝐼_𝑖^𝑑, 𝑞_𝑖, 𝐼_𝑖^𝑚,𝐻_𝑖^∗ ] 
#𝐸_𝑝=[𝑖,𝑗,𝑐_𝑝]
#Create data matrix
A = np.zeros((n_sims , num_links*num_time, 3))
B = np.zeros((n_sims , num_nodes*num_time, 6))
U = np.zeros((n_sims , num_nodes*num_time, 1))



# Store the randomly chosen pairs of removable links
links2remove = random.sample(removable_pairs, n_sims)

## Measurement when fully-supervised
measurement_fully = []
monitor = ['5', '11', '32', '37', '44']

In [6]:
for sim in range(n_sims):
    wn = wntr.network.WaterNetworkModel(inp_file)  # reset value
    wn.options.hydraulic.demand_model = 'DD' #dynamic demand model

    i = 0
    for _, node in wn.nodes():
        node.id = i
        i += 1

    if sim != 0:
        (link1, link2) = links2remove[sim - 1]
        wn.remove_link(link1)
        wn.remove_link(link2)

    i = 0
    for _, link in wn.links():
        A[sim, i, 0] = link.start_node.id
        A[sim, i, 1] = link.end_node.id
        if link.link_type=='Pipe':
            A[sim, i, 2] = 1/(10.667*link.length/link.roughness**1.852/link.diameter**4.871)
            i+=1
        elif link.link_type=='Pump':
            A[sim, i, 2] = 1        
        elif link.link_type=='Valve':
            A[sim, i, 2] = 2        
        else:
            print('Other types of links exist.')
        i+=1

    results = wntr.sim.EpanetSimulator(wn).run_sim(version=2.0)
    head = results.node['head']
    demand = results.node['demand']
    demand = np.maximum(demand, 0)
    
    ### Produce datas for multiple timestep
    #### Time step
    index_values = head.index.values
    np.random.seed(42)
    Time_step = np.random.choice(index_values, size=5, replace=False)
    
    repeated_timestep = pd.Series(Time_step).repeat(num_nodes).reset_index(drop=True)
    Time_indicator = pd.DataFrame({'Timestep': repeated_timestep})
    Time_indicator = Time_indicator.squeeze()
    
    #### Demand
    demand_s = demand.loc[Time_step]
    demand_s = demand_s.values.flatten()
    #### Head
    head_s = head.loc[Time_step]
    head_s = head_s.values.flatten()
    #### Node indicator (the number of the node)
    Node_indicator = np.tile(np.arange(1, num_nodes+1), num_time)
        # Measurement when fully-supervised
    measurement_fully = Node_indicator
    #### Junction indicator (if the node is not a reservoir, junction indicator = 1)
    Nd_single = np.array([1 if node.node_type == 'Junction' else 0 for _, node in wn.nodes()])
    Nd = np.tile(Nd_single,num_time)
    Nd = Nd.squeeze()
    #### Measurement indicator (if the node has head, measurement indicator = 0) 
    Nh_single = np.array([1 if node.node_type=='Junction' and node.name not in monitor else 0 for _,node in wn.nodes()])
    Nh = np.tile(Nh_single,num_time)
    Nh = Nh.squeeze()
    ###
    
    ### Node
    B[sim, :, 0] = Time_indicator
    B[sim, :, 1] = Node_indicator
    B[sim, :, 2] = Nd
    B[sim, :, 3] = demand_s
    B[sim, :, 4] = Nh
    B[sim, :, 5] = (1 - Nh) * head_s
    ###

    U[sim, :, 0] = head_s

In [7]:
### Edge
A_re = A[:, :num_links, :]  # Extract the first 65 rows along the second axis

# Repeat the 65 rows to cover the entire second axis
A_re = np.tile(A_re, (1, num_time, 1))


### Change 3-d array to DataFrame

In [8]:
array_3d_shape = B.shape
new_shape = (array_3d_shape[0]* array_3d_shape[1] ,array_3d_shape[2])

# Reshaping the 3-dimensional array into a 2-dimensional array
B_2d = B.reshape(new_shape)

# Converting the 2-dimensional array into a DataFrame
node_df = pd.DataFrame(B_2d)

array_3d_shape = A_re.shape
new_shape = (array_3d_shape[0]* array_3d_shape[1] ,array_3d_shape[2])

# Reshaping the 3-dimensional array into a 2-dimensional array
A_2d = A_re.reshape(new_shape)

# Converting the 2-dimensional array into a DataFrame
edge_df = pd.DataFrame(A_2d)

array_3d_shape = U.shape
new_shape = (array_3d_shape[0]* array_3d_shape[1] ,array_3d_shape[2])

# Reshaping the 3-dimensional array into a 2-dimensional array
U_2d = U.reshape(new_shape)

# Converting the 2-dimensional array into a DataFrame
graph_df = pd.DataFrame(U_2d)

In [9]:
graph_df.rename(columns={0: 'head'},inplace=True)
edge_df.rename(columns={0: 'source', 1: 'target', 2: 'loss_co'},inplace=True)
node_df.rename(columns={0:'Time_indicator', 1: 'Node_indicator', 2:'Junction_in',3: 'demand', 4: 'Measurement_in',5:'measurement'},inplace=True)

## Scale the data

In [10]:
# Edge
## Create a StandardScaler object
scaler = StandardScaler()

## Select the column to scale
column_to_scale = 'loss_co'

## Fit and transform the selected column
edge_df[column_to_scale] = scaler.fit_transform(edge_df[[column_to_scale]])
#
# Node
## Create a StandardScaler object
scaler = StandardScaler()

## Select the column to scale
columns_to_scale = ['demand', 'measurement']

# Fit and transform the selected columns
node_df[columns_to_scale] = scaler.fit_transform(node_df[columns_to_scale])
#

Unnamed: 0,Time_indicator,Node_indicator,Junction_in,demand,Measurement_in,measurement
0,496800.0,1.0,1.0,-0.410048,1.0,-0.365148
1,496800.0,2.0,1.0,0.155513,1.0,-0.365148
2,496800.0,3.0,1.0,1.581834,1.0,-0.365148
3,496800.0,4.0,1.0,2.881058,1.0,-0.365148
4,496800.0,5.0,1.0,-0.244421,0.0,2.737187
5,496800.0,6.0,1.0,1.87288,1.0,-0.365148
6,496800.0,7.0,1.0,-0.263777,1.0,-0.365148
7,496800.0,8.0,1.0,0.875395,1.0,-0.365148
8,496800.0,9.0,1.0,-0.161813,1.0,-0.365148
9,496800.0,10.0,1.0,0.248173,1.0,-0.365148


In [11]:
test_edge_df = edge_df[n_sims * num_links * (num_time - 1):]
test_node_df = node_df[n_sims * num_nodes * (num_time - 1):]

train_edge_df = edge_df[:n_sims * num_links * (num_time - 1)]
train_node_df = node_df[:n_sims * num_nodes * (num_time - 1)]

In [12]:
def create_graph_tensor(node_df, edge_df):
    graph_tensor = tfgnn.GraphTensor.from_pieces(

        node_sets={
            "node": tfgnn.NodeSet.from_fields(
                sizes=[len(node_df)],
                features={
                    #'Node_indicator':np.array(node_df['Node_indicator'], dtype='int32').reshape(len(node_df),1),
                    'Junction_in':np.array(node_df['Junction_in'], dtype='int32').reshape(len(node_df),1),
                    'demand': np.array(node_df['demand'], dtype='float32').reshape(len(node_df),1),
                    'Measurement_in': np.array(node_df['Measurement_in'], dtype='int32').reshape(len(node_df),1),
                    'measurement': np.array(node_df['measurement'], dtype='float32').reshape(len(node_df),1),
                }
            )
        },
        edge_sets={
            "link": tfgnn.EdgeSet.from_fields(
                sizes=[len(edge_df)],
                features={
                    'loss_co': np.array(edge_df['loss_co'], dtype='float32').reshape(len(edge_df),1),
                },
                adjacency=tfgnn.Adjacency.from_indices(
                                          source=("node", np.array(edge_df['source'], dtype='int32')),
                                          target=("node", np.array(edge_df['target'], dtype='int32')),
                                      ))
        }
    )

    return graph_tensor


In [13]:
full_tensor = create_graph_tensor(node_df, edge_df)
train_tensor = create_graph_tensor(train_node_df, train_edge_df)
test_tensor = create_graph_tensor(test_node_df, test_edge_df)

split off ‘head’ as our target (label).

In [14]:
def node_batch_merge(graph):
    graph = graph.merge_batch_to_components()
    node_features = graph.node_sets['node'].get_features_dict()
    edge_features = graph.edge_sets['link'].get_features_dict()
    
    label = node_features.pop('measurement')
    print(label)
    new_graph = graph.replace_features(node_sets={'node': node_features}, edge_sets={'link': edge_features})
    
    return new_graph, label


     
def create_dataset(graph, function):
    dataset = tf.data.Dataset.from_tensors(graph)
    dataset = dataset.batch(32)
    return dataset.map(function)

### Train dataset

In [15]:
full_node_dataset = create_dataset(full_tensor, node_batch_merge)
train_node_dataset = create_dataset(train_tensor, node_batch_merge)
test_node_dataset = create_dataset(test_tensor, node_batch_merge)

Tensor("Reshape_5:0", shape=(None, 1), dtype=float32)
Tensor("Reshape_5:0", shape=(None, 1), dtype=float32)
Tensor("Reshape_5:0", shape=(None, 1), dtype=float32)


# Model Building

## Build model inputs

In [16]:
graph_spec = train_node_dataset.element_spec[0]
input_graph = tf.keras.layers.Input(type_spec=graph_spec)

In [17]:
def set_initial_node_state(node_set, node_set_name):
    features = [
        #tf.keras.layers.Dense(32, activation="relu")(node_set['Node_indicator']),
        tf.keras.layers.Dense(32, activation="relu")(node_set['Junction_in']),
        tf.keras.layers.Dense(32, activation="relu")(node_set['demand']),
        tf.keras.layers.Dense(32, activation="relu")(node_set['Measurement_in']),
        #tf.keras.layers.Dense(32, activation="relu")(node_set['measurement']),
    ]
    return tf.keras.layers.Concatenate()(features)


def set_initial_edge_state(edge_set, edge_set_name):
    features = [
        tf.keras.layers.Dense(32, activation="relu")(edge_set['loss_co']),
    ]
    return tf.keras.layers.Concatenate()(features)




In [18]:
graph = tfgnn.keras.layers.MapFeatures(node_sets_fn=set_initial_node_state,
                                       edge_sets_fn=set_initial_edge_state)(input_graph)



In [19]:
def dense_layer(units=32, l2_reg=0.1, dropout=0.25, activation='relu'):
    regularizer = tf.keras.regularizers.l2(l2_reg)
    return tf.keras.Sequential([
        tf.keras.layers.Dense(units, kernel_regularizer=regularizer, bias_regularizer=regularizer),
        tf.keras.layers.Dropout(dropout)
    ])


### Compile

In [20]:
graph_updates = 5
for i in range(graph_updates):
    graph = tfgnn.keras.layers.GraphUpdate(
        node_sets={
            'node':
            tfgnn.keras.layers.NodeSetUpdate(
                {
                    'link':
                    tfgnn.keras.layers.SimpleConv(message_fn=dense_layer(16),
                                                  reduce_type="mean",
                                                  sender_edge_feature=tfgnn.HIDDEN_STATE,
                                                  receiver_tag=tfgnn.TARGET)
                }, tfgnn.keras.layers.NextStateFromConcat(dense_layer(32)))
        })(graph)

dense1 = tf.keras.layers.Dense(64)(graph.node_sets["node"][tfgnn.HIDDEN_STATE])
dense2 = tf.keras.layers.Dense(64)(dense1)
dense3 = tf.keras.layers.Dense(1)(dense2)

In [21]:
node_model = tf.keras.Model(input_graph, dense3)
node_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                   loss='mean_squared_error',
                   metrics=['mean_squared_error'])
node_model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [()]                      0         
                                                                 
 map_features (MapFeatures)  ()                        256       
                                                                 
 graph_update (GraphUpdate)  ()                        7216      
                                                                 
 graph_update_1 (GraphUpdate  ()                       3120      
 )                                                               
                                                                 
 graph_update_2 (GraphUpdate  ()                       3120      
 )                                                               
                                                                 
 graph_update_3 (GraphUpdate  ()                       3120  

### Train model

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                      mode='min',
                                      verbose=1,
                                      patience=10,
                                      restore_best_weights=True)

node_model.fit(train_node_dataset.repeat(),
               validation_data=full_node_dataset,
               steps_per_epoch=100,
               epochs=100,
               callbacks=[es])

### Evaluate model

In [None]:
eval_result = node_model.evaluate(test_node_dataset)
print(dict(zip(node_model.metrics_names, eval_result)))

In [None]:
predictions = pd.DataFrame(node_model.predict(full_node_dataset))
print(predictions)

In [None]:
correlation = node_df['measurement'].corr(predictions[0])
print(correlation)

In [None]:
## After scaling

In [None]:
eval_result = node_model.evaluate(test_node_dataset)
print(dict(zip(node_model.metrics_names, eval_result)))

In [None]:
predictions = pd.DataFrame(node_model.predict(full_node_dataset))
correlation = node_df['measurement'].corr(predictions[0])
print(correlation)