In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import wntr
import wntr.metrics.economic as economics
import numpy as np
import pandas as pd
import pdb
import os

import sys
sys.path.append('../')
from testWN import testWN as twm
from surrogate_model_training_data import get_data

from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [3]:
%matplotlib notebook

# Get network informations

In [4]:
inp_file = '../../Code/c-town_true_network_simplified_controls.inp'
ctown = twm(inp_file)
nw_node_df = pd.DataFrame(ctown.wn.nodes.todict())
nw_link_df = pd.DataFrame(ctown.wn.links.todict())

node_names = ctown.getNodeName()
link_names = ctown.getLinkName()


Not all curves were used in "../../Code/c-town_true_network_simplified_controls.inp"; added with type None, units conversion left to user



# Data Pre-Processing

Get clusters:

In [5]:
cluster_labels = pd.read_json('cluster_labels.json')
pressure_factor = pd.read_json('pressure_factor.json')
n_clusters = 30

Get results:

In [6]:
file = '/home/felix/tubCloud/Shared/WDN_SurrogateModels/_RESULTS/150sim/results_sim_14_try.pkl'
with open(file, 'rb') as f:
    results = pickle.load(f)

## Physical preprocessing:

### Junctions
Scale pressure for each node (similar to clustering)

In [7]:
junction_pressure_scaled = results.node['pressure'][node_names[2]]/pressure_factor.to_numpy()

In [8]:
jun_cl_press = junction_pressure_scaled.groupby(cluster_labels.loc['pressure'], axis=1)
jun_cl_press_mean = jun_cl_press.mean()
jun_cl_press_std = jun_cl_press.std()

jun_cl_demand = results.node['demand'][node_names[2]].groupby(cluster_labels.loc['pressure'], axis=1)
jun_cl_demand_sum = jun_cl_demand.sum()

jun_cl_qual = results.node['quality'][node_names[2]].diff(axis=0).groupby(cluster_labels.loc['quality'], axis=1)
qual_cl_qual_mean = jun_cl_qual.mean()
qual_cl_qual_std = jun_cl_qual.std()

In [9]:
fig, ax = plt.subplots(figsize=(9,2.5))
jun_cl_press_mean.plot(ax=ax, legend=False, linewidth=0.5, color='k', alpha=0.4)
ax.set_xlabel('time [s]')
ax.set_ylabel('pressure [m]')
ax.set_title('Cluster pressure over time')
plt.show()

<IPython.core.display.Javascript object>

### Tanks

In [10]:
tank_press = results.node['pressure'][node_names[0]]
# Subtract tank elevation from tank head to obtain tank_level
tank_level = results.node['head'][node_names[0]]-nw_node_df[node_names[0]].loc['elevation']

tank_qual = results.node['quality'][node_names[0]]

In [11]:
fig, ax = plt.subplots(3,1, sharex=True,)
tank_press.plot(ax=ax[0])
ax[0].set_ylabel('tank pressure [m]')
tank_level.plot(ax=ax[1], legend=False)
ax[1].set_ylabel('tank level [m]')
tank_qual.plot(ax=ax[2], legend=False)
ax[2].set_ylabel('tank quality [s]')
ax[2].set_xlabel('time [s]')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'time [s]')

### Reservoirs

In [12]:
reservoir_press = results.node['pressure'][node_names[1]]
reservoir_level = results.node['head'][node_names[1]]-nw_node_df[node_names[1]].loc['elevation']

reservoir_qual = results.node['quality'][node_names[1]]

In [13]:
fig, ax = plt.subplots(3,1, sharex=True,)
reservoir_press.plot(ax=ax[0])
ax[0].set_ylabel('reservoir pressure [m]')
reservoir_level.plot(ax=ax[1], legend=False)
ax[1].set_ylabel('reservoir level [m]')
reservoir_qual.plot(ax=ax[2], legend=False)
ax[2].set_ylabel('reservoir quality [s]')
ax[2].set_xlabel('time [s]')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'time [s]')

### Pumps

In [14]:
head_pump_speed = results.link['setting'][nw_link_df.keys()[nw_link_df.loc['link_type'] == 'Pump']]

In [15]:
fig, ax = plt.subplots()
head_pump_speed.plot(ax=ax)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f9c2c259c50>

In [16]:
pump_energy = economics.pump_energy(results.link['flowrate'], results.node['head'], ctown.wn)[link_names[0]]
pump_energy /= 1000
pump_energy.head(3)

Unnamed: 0,PU2,PU5,PU6,PU8,PU10
0,58.227444,14.631674,0.0,26.532584,23.435502
900,58.196094,14.636934,0.0,26.524227,23.469185
1800,58.165324,14.641828,0.0,26.515586,23.502525


In [17]:
fig, ax = plt.subplots()
pump_energy.plot(ax=ax)
ax.set_ylabel('power [kW]')
ax.set_xlabel('time [s]')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'time [s]')

### Valves

In [18]:
PRValve_dp = results.link['setting'][nw_link_df.keys()[nw_link_df.loc['valve_type'] == 'PRV']]
TCValve_throttle = results.link['setting'][nw_link_df.keys()[nw_link_df.loc['valve_type'] == 'TCV']]

In [19]:
fig, ax = plt.subplots(1,2)
PRValve_dp.plot(ax=ax[0])
TCValve_throttle.plot(ax=ax[1])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f9c2b8bd0d0>

## Neural Network Pre-Processing
### States and Inputs

In [20]:
state_dict = {'jun_cl_press_mean': jun_cl_press_mean,
              # 'jun_cl_press_std': jun_cl_press_std,
              # 'dqual_cl_press_mean': dqual_cl_press_mean,
              # 'dqual_cl_press_std': dqual_cl_press_std,
              'tank_press': tank_press,
              # 'tank_level': tank_level,
              # 'tank_qual': tank_qual,
              # 'reservoir_press': reservoir_press,
              # 'reservoir_level': reservoir_level,
              # 'reservoir_qual': reservoir_qual,
              }

sys_states = pd.concat(state_dict.values(), axis=1, keys=state_dict.keys())


input_dict = {'head_pump_speed': head_pump_speed,
              'PRValve_dp': PRValve_dp,
              'TCValve_throttle': TCValve_throttle,
              'jun_cl_demand_sum': jun_cl_demand_sum}

sys_inputs = pd.concat(input_dict.values(), axis=1, keys=input_dict.keys())

aux_output_dict = {'pump_energy': pump_energy,}

aux_outputs = pd.concat(aux_output_dict.values(), axis=1, keys=aux_output_dict.keys())

### Neural Network I/O

Outputs of the neural network.

Note: we are learning the next state of the system and the current aux_outputs (pump energy)

In [21]:
sys_states_next = sys_states.shift(-1, axis=0)

nn_output_dict = {'sys_states': sys_states_next,
                 'aux_outputs': aux_outputs}


nn_output = pd.concat(nn_output_dict.values(), axis=1, keys=nn_output_dict.keys())

In [22]:
nn_output.head(3)

Unnamed: 0_level_0,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,aux_outputs,aux_outputs,aux_outputs,aux_outputs,aux_outputs
Unnamed: 0_level_1,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,...,tank_press,tank_press,tank_press,tank_press,tank_press,pump_energy,pump_energy,pump_energy,pump_energy,pump_energy
Unnamed: 0_level_2,0,1,2,3,4,5,6,7,8,9,...,T7,T6,T5,T2,T4,PU2,PU5,PU6,PU8,PU10
0,0.359735,0.942131,0.581662,0.945813,0.902581,0.492358,0.997837,0.254253,0.578518,0.488985,...,3.397771,5.253158,1.385392,1.612929,3.221158,58.227444,14.631674,0.0,26.532584,23.435502
900,0.358263,0.941637,0.582362,0.945172,0.901817,0.493456,0.997174,0.252537,0.58005,0.489438,...,3.53447,5.334502,1.55021,1.715537,2.992232,58.196094,14.636934,0.0,26.524227,23.469185
1800,0.356845,0.941151,0.583059,0.944541,0.901065,0.494547,0.996519,0.250848,0.581577,0.489889,...,3.670313,5.415967,1.714489,1.816875,2.763316,58.165324,14.641828,0.0,26.515586,23.502525


Inputs of the neural network:

In [23]:
nn_input_dict = {'sys_states': sys_states,
                 'sys_inputs': sys_inputs}
nn_input = pd.concat(nn_input_dict.values(), axis=1, keys=nn_input_dict.keys())

print('n_samples, n_feat = {}'.format(nn_input.shape))

n_samples, n_feat = (673, 76)


NARX model structure?

In [24]:
if True:
    n_arx = 5
    arx_input = []
    for i in range(n_arx):
        arx_input.append(nn_input.shift(i, axis=0))

    arx_input = pd.concat(arx_input, keys=np.arange(n_arx), names=['NARX', 'type', 'name', 'index'], axis=1)
    nn_input = arx_input
print('n_samples, n_feat = {}'.format(nn_input.shape))

n_samples, n_feat = (673, 380)


In [25]:
nn_input.head(3)

NARX,0,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4
type,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,sys_states,...,sys_inputs,sys_inputs,sys_inputs,sys_inputs,sys_inputs,sys_inputs,sys_inputs,sys_inputs,sys_inputs,sys_inputs
name,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,...,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum,jun_cl_demand_sum
index,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.361217,0.942634,0.58096,0.946468,0.90336,0.491254,0.998509,0.25597,0.576981,0.488532,...,,,,,,,,,,
900,0.359735,0.942131,0.581662,0.945813,0.902581,0.492358,0.997837,0.254253,0.578518,0.488985,...,,,,,,,,,,
1800,0.358263,0.941637,0.582362,0.945172,0.901817,0.493456,0.997174,0.252537,0.58005,0.489438,...,,,,,,,,,,


Check if NARX structure works:

In [26]:
nn_input.xs(('sys_states',slice(0)),level=('type','index'), axis=1).head(5)

NARX,0,1,2,3,4
name,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean
0,0.361217,,,,
900,0.359735,0.361217,,,
1800,0.358263,0.359735,0.361217,,
2700,0.356845,0.358263,0.359735,0.361217,
3600,0.329629,0.356845,0.358263,0.359735,0.361217


Filter inputs and outputs:

In [27]:
# Filter nan:
output_filter = nn_output.isnull().any(axis=1)
if output_filter.any():
    nn_input = nn_input[~output_filter]
    nn_output = nn_output[~output_filter]

input_filter = nn_input.isnull().any(axis=1)
if input_filter.any():
    nn_input = nn_input[~input_filter]
    nn_output = nn_output[~input_filter]

print('n_samples, n_feat = {}'.format(nn_input.shape))

n_samples, n_feat = (668, 380)


In [28]:
nn_input.xs(('sys_states',slice(0)),level=('type','index'), axis=1).head(3)

NARX,0,1,2,3,4
name,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean,jun_cl_press_mean
3600,0.329629,0.356845,0.358263,0.359735,0.361217
4500,0.32742,0.329629,0.356845,0.358263,0.359735
5400,0.3252,0.32742,0.329629,0.356845,0.358263


In [29]:
pd.DataFrame(nn_output['sys_states','jun_cl_press_mean',0]).head(3)

Unnamed: 0_level_0,sys_states
Unnamed: 0_level_1,jun_cl_press_mean
Unnamed: 0_level_2,0
3600,0.32742
4500,0.3252
5400,0.322979


### Compare to external output function:

This function is used to create the complete dataset for training.

In [30]:
file_list = ['/home/felix/tubCloud/Shared/WDN_SurrogateModels/_RESULTS/150sim/results_sim_14_try.pkl',
            #'/home/felix/tubCloud/Shared/WDN_SurrogateModels/_RESULTS/150sim/results_sim_15_try.pkl'
            ]

nn_input_ext, nn_output_ext = get_data(file_list, narx_horizon=5)

In [31]:
similar_inputs=((nn_input_ext-nn_input)==0).all().all()
print('inputs are identical: {}'.format(similar_inputs))

similar_outputs=((nn_output_ext-nn_output)==0).all().all()
print('outputs are identical: {}'.format(similar_outputs))

inputs are identical: True
outputs are identical: True


### Normalize Data:

In [32]:
input_offset = nn_input.mean()
nn_input_offset = nn_input - input_offset

input_scaling = nn_input_offset.abs().max()
input_scaling.loc[input_scaling.abs()<1e-5]=1e-5
nn_input_scaled = nn_input_offset/input_scaling

output_offset = nn_output.mean()
nn_output_offset = nn_output - output_offset

output_scaling = nn_output_offset.abs().max()
output_scaling.loc[output_scaling.abs()<1e-5]=1e-5
nn_output_scaled = nn_output_offset/output_scaling

### Train / Test Splitting

In [32]:
X_train, X_test, Y_train, Y_test = train_test_split(nn_input_scaled, nn_output_scaled, test_size=0.2)

# Neural Network

## Create Model

In [88]:
n_layer = 3
n_units = 100
l1_regularizer = 0

model_param = {}
model_param['n_in'] = nn_input.shape[1]
model_param['n_out'] = nn_output.shape[1]
model_param['n_units'] = (n_layer)*[n_units]
model_param['activation'] = (n_layer) * ['tanh']

inputs = keras.Input(shape=(model_param['n_in'],))

layer_list = [inputs]


for i in range(len(model_param['n_units'])-1):
    layer_list.append(
        keras.layers.Dense(model_param['n_units'][i],
                           activation=model_param['activation'][i],
                           kernel_regularizer=keras.regularizers.l1(l=l1_regularizer)
                           )(layer_list[i])
    )

outputs = keras.layers.Dense(model_param['n_out'],
                             activation='linear')(layer_list[-1])

model = keras.Model(inputs=inputs, outputs=outputs)

model.summary()

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 380)]             0         
_________________________________________________________________
dense_19 (Dense)             (None, 100)               38100     
_________________________________________________________________
dense_20 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_21 (Dense)             (None, 42)                4242      
Total params: 52,442
Trainable params: 52,442
Non-trainable params: 0
_________________________________________________________________


## Train model

In [98]:
optim = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
callback = keras.callbacks.EarlyStopping(monitor='loss', min_delta=1e-8, patience=50, mode='min')
model.compile(optimizer=optim,
              loss='mse')

In [99]:
history = model.fit(X_train.to_numpy(), 
                    Y_train.to_numpy(), 
                    batch_size=50, 
                    epochs=1000,
                    validation_data=(X_test.to_numpy(),Y_test.to_numpy()),
                    callbacks=[callback])

Train on 534 samples, validate on 134 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000


## Evaluate model

In [100]:
class narx_simulator:
    def __init__(self, model, n_arx, input_scaling, output_scaling, input_offset, output_offset, nn_in_0):
        self.model = model
        self.n_arx = n_arx
        
        self.input_scaling = input_scaling.to_numpy()
        self.output_scaling = output_scaling.to_numpy()
        self.input_offset = input_offset.to_numpy()
        self.output_offset = output_offset.to_numpy()
        
        self.nn_in = nn_in_0
        
        self.t0 = nn_in_0.index[0]
        self.dt = 900
        
    def prepare_next_iter(self):  
        # Update time
        t_prev = self.t0
        self.t0 += self.dt
        # Create new row in results table:
        self.nn_in.append(pd.Series(name=self.t0))
        # Update current states:
        #self.nn_in.loc[self.t0,(0,'sys_states')] = self.next_state.loc[t_prev].to_numpy()
        self.nn_in.loc[self.t0,(0,'sys_states')] = self.nn_out_df['sys_states'].to_numpy().flatten()


        # Update previous states + inputs for NARX structure
        for k in range(n_arx-1):
            self.nn_in.loc[self.t0,(k+1,'sys_states')] = self.nn_in.loc[t_prev,(k,'sys_states')].to_numpy()        
            self.nn_in.loc[self.t0,(k+1,'sys_inputs')] = self.nn_in.loc[t_prev,(k,'sys_inputs')].to_numpy()
        
    def eval_nn(self):
        # Scale input:
        nn_in_scaled = (self.nn_in.tail(1)-self.input_offset)/self.input_scaling
        # Evaluate NN:
        nn_out_scaled = self.model.predict(nn_in_scaled.to_numpy())
        # Scale output:
        nn_out = nn_out_scaled*self.output_scaling+self.output_offset
        
        self.nn_out_df = pd.DataFrame(nn_out, columns=nn_output.keys())
        # Output is difference from current to next state. Calculate next state:
#         self.next_state = self.nn_in[0,'sys_states'].tail(1) + nn_out
        self.next_state = nn_out
        
    def next_step(self, sys_inputs_now):
        self.nn_in.loc[self.t0,(0,'sys_inputs')] = sys_inputs_now.to_numpy()
        
        self.eval_nn()
        
        self.prepare_next_iter()
        

In [101]:
nsim = narx_simulator(model, 5, input_scaling, output_scaling, input_offset, output_offset, nn_input.head(1))

In [102]:
for k in range(50):
    sys_inputs_now = nn_input.loc[nsim.t0,(0,'sys_inputs')]
    nsim.next_step(sys_inputs_now)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [104]:
fig, ax = plt.subplots(2,1, sharex=True)

jun_cl_press_mean.plot(ax=ax[0], legend=False, linewidth=0.5, color='k', alpha=0.4)
nsim.nn_in[0,'sys_states', 'jun_cl_press_mean'].plot(ax=ax[0], legend=False)

tank_press.plot(ax=ax[1], legend=False, alpha=0.5, linewidth=4)
ax[1].set_prop_cycle(None)
nsim.nn_in[0,'sys_states', 'tank_press'].plot(ax=ax[1])

ax[0].set_xlim(0, nsim.nn_in.index.max())

ax[0].set_ylabel('normalized pressure \n in clusters')
ax[1].set_ylabel('tank level [m]')
ax[1].set_xlabel('time [s]')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'time [s]')

# Linear Model

In [176]:
linear_reg = LinearRegression().fit(nn_input_scaled.to_numpy(), nn_output_scaled.to_numpy())

In [185]:
nsim_lin = narx_simulator(linear_reg, 5, input_scaling, output_scaling, input_offset, output_offset, nn_input.head(1))

In [186]:
for k in range(50):
    sys_inputs_now = nn_input.loc[nsim_lin.t0,(0,'sys_inputs')]
    nsim_lin.next_step(sys_inputs_now)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [187]:
fig, ax = plt.subplots(2,1, sharex=True)

jun_cl_press_mean.plot(ax=ax[0], legend=False, linewidth=0.5, color='k', alpha=0.4)
nsim_lin.nn_in[0,'sys_states', 'jun_cl_press_mean'].plot(ax=ax[0], legend=False)

tank_press.plot(ax=ax[1], legend=False, alpha=0.5, linewidth=4)
ax[1].set_prop_cycle(None)
nsim_lin.nn_in[0,'sys_states', 'tank_press'].plot(ax=ax[1])

ax[0].set_xlim(0, nsim_lin.nn_in.index.max())

ax[0].set_ylabel('normalized pressure \n in clusters')
ax[1].set_ylabel('tank level [m]')
ax[1].set_xlabel('time [s]')

<IPython.core.display.Javascript object>


indexing past lexsort depth may impact performance.



Text(0.5, 0, 'time [s]')

# Create full training data set:

- create train/test split

In [33]:
data_path = '/home/felix/tubCloud/Shared/WDN_SurrogateModels/_RESULTS/150sim/'
file_list = os.listdir(data_path)
file_list = [data_path+file_i for file_i in file_list if '.pkl' in file_i]

In [41]:
nn_input, nn_output = get_data(file_list, 4)


In [44]:
X_train, X_test, Y_train, Y_test = train_test_split(nn_input, nn_output, test_size=0.2)

Pack everything needed for training in a .pkl file:

In [45]:
training_dict = {
    'X_train': X_train,
    'X_test': X_test,
    'Y_train': Y_train,
    'Y_test': Y_test,
    'cluster_labels': cluster_labels,
    'pressure_factor': pressure_factor
}

with open('./training_data/training_setup_narx4.pkl', 'wb') as f:
    results = pickle.dump(training_dict, f)

In [43]:
X_train.shape

(80280, 304)