# Leak Detection

## 1. Make Graph

Convert the `EPANET` model to a `networkx` graph

In [1]:
import os
import yaml
import time
import torch
import epynet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from utils.epanet_loader import get_nx_graph
from utils.epanet_simulator import epanetSimulator
from utils.data_loader import battledimLoader, dataCleaner, dataGenerator, embedSignalOnGraph, rescaleSignal
from modules.torch_gnn import ChebNet
from utils.visualisation import visualise

# Runtime configuration
path_to_wdn     = './data/L-TOWN.inp'
path_to_data    = './data/l-town-data/'
weight_mode     = 'pipe_length'
self_loops      = True
scaling         = 'minmax'
figsize         = (50,16)
print_out_rate  = 1               
model_name      = 'l-town-chebnet-' + weight_mode +'-' + scaling + '{}'.format('-self_loop' if self_loops else '')
last_model_path = './studies/models/' + model_name + '-1.pt'
last_log_path   = './studies/logs/'   + model_name + '-1.csv' 

# Import the .inp file using the EPYNET library
wdn = epynet.Network(path_to_wdn)

# Solve hydraulic model for a single timestep
wdn.solve()

# Convert the file using a custom function, based on:
# https://github.com/BME-SmartLab/GraphConvWat 
G , pos , head = get_nx_graph(wdn, weight_mode=weight_mode, get_head=True)

Get a list of node IDs with sensors

In [2]:
# Open the dataset configuration file
with open(path_to_data + 'dataset_configuration.yml') as file:

    # Load the configuration to a dictionary
    config = yaml.load(file, Loader=yaml.FullLoader) 

# Generate a list of integers, indicating the number of the node
# at which a  pressure sensor is present
sensors = [int(string.replace("n", "")) for string in config['pressure_sensors']]

Add self-loops to the sensor nodes in the graph

In [3]:
if self_loops:
    for sensor_node in sensors:             # For each node in the sensor list
        G.add_edge(u_of_edge=sensor_node,   # Add an edge from that node ...
                   v_of_edge=sensor_node,   # ... to itself ...
                   weight=1.,name='SELF')   # ... and set its weight to equal 1

## 2. Get Simulation Data

We run an EPANET simulation using the WNTR library and the EPANET
nominal model supplied with the BattLeDIM competition. <br>
With this simulation, we have a complete pressure signal for all
nodes in the network, on which the GNN algorithm is to be trained.

In [4]:
# Instantiate the nominal WDN model
nominal_wdn_model = epanetSimulator(path_to_wdn, path_to_data)

# Run a simulation
nominal_wdn_model.simulate()

# Retrieve the nodal pressures
nominal_pressure = nominal_wdn_model.get_simulated_pressure()

Populate feature vector x and label vector y from the nominal pressures. <br>
Also retrieve the scale and bias of the scaling transformation. <br>
This is so we can inverse transform the predicted values to calculate relative reconstruction errors

In [5]:
x,y,scale,bias = dataCleaner(pressure_df    = nominal_pressure, # Pass the nodal pressures
                             observed_nodes = sensors,          # Indicate which nodes have sensors
                             rescale        = scaling)          # Perform scaling on the timeseries data

# Split the data into training and validation sets
x_trn, x_val, y_trn, y_val = train_test_split(x, y, 
                                              test_size    = 0.2,
                                              random_state = 1,
                                              shuffle      = False)

## 3. Get Historical Data

In [6]:
# Load the data into a numpy array with format matching the GraphConvWat problem
pressure_2018 = battledimLoader(observed_nodes = sensors,
                                n_nodes        = 782,
                                path           = path_to_data,
                                file           = '2018_SCADA_Pressures.csv',
                                rescale        = True, 
                                scale          = scale,
                                bias           = bias)

pressure_2019 = battledimLoader(observed_nodes = sensors,
                                n_nodes        = 782,
                                path           = path_to_data,
                                file           = '2019_SCADA_Pressures.csv',
                                rescale        = True, 
                                scale          = scale,
                                bias           = bias)

## 4. Load a Trained GNN Model

In [7]:
# Set the computation device as NVIDIA GPU if available else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Instantiate a Chebysev Network GNN model
model  = ChebNet(name           = 'ChebNet',
                 data_generator = None,
                 device         = device, 
                 in_channels    = np.shape(x_trn)[-1], 
                 out_channels   = np.shape(y_trn)[-1],
                 data_scale     = scale, 
                 data_bias      = bias).to(device)

In [8]:
# We offer the user the option to load the previously trained weights
model.load_model(last_model_path, last_log_path)


                Loaded previous model results...
                --------------------------------------------------
                Model has been trained for:	100 epochs
                Best validation loss:      	1.6016847697140293e-05 
                Occurred in training round:	96 


## 5. Predict a Year's worth of Data

In [9]:
def predict_pressure(graph, pressure_series, print_out_rate=100, save=True, filename='predictions.csv'):
    results = []
    elapsed_time = time.time()
    for i, partial_graph_signal in enumerate(pressure_series):
        if not i % print_out_rate:
            execution_time = time.time()
        
        results.append(model.predict(graph, partial_graph_signal))
        
        if not i % print_out_rate:
            print('Signal:\t{}\t Execution:\t{:.3f} s\t Elapsed:\t{:.3f} s'.format(i,
                                                                                   time.time()-execution_time, 
                                                                                   time.time()-elapsed_time))
    if save:
        print('-'*63+"\nSaving results to: \n{}/{}\n\n".format(os.getcwd(),filename))
        pd.DataFrame(results).to_csv(filename)
    
    return results

In [10]:
prediction_2018 = predict_pressure(G, 
                                   pressure_2018[:11], 
                                   print_out_rate = print_out_rate, 
                                   save           = True, 
                                   filename       = '2018_predictions.csv')

prediction_2019 = predict_pressure(G, 
                                   pressure_2019[:11], 
                                   print_out_rate = print_out_rate, 
                                   save           = True, 
                                   filename       = '2019_predictions.csv')

Signal:	0	 Execution:	0.266 s	 Elapsed:	0.266 s
Signal:	1	 Execution:	0.262 s	 Elapsed:	0.528 s
Signal:	2	 Execution:	0.256 s	 Elapsed:	0.785 s
Signal:	3	 Execution:	0.251 s	 Elapsed:	1.036 s
Signal:	4	 Execution:	0.252 s	 Elapsed:	1.288 s
Signal:	5	 Execution:	0.257 s	 Elapsed:	1.545 s
Signal:	6	 Execution:	0.253 s	 Elapsed:	1.799 s
Signal:	7	 Execution:	0.258 s	 Elapsed:	2.057 s
Signal:	8	 Execution:	0.287 s	 Elapsed:	2.343 s
Signal:	9	 Execution:	0.289 s	 Elapsed:	2.633 s
Signal:	10	 Execution:	0.275 s	 Elapsed:	2.908 s
---------------------------------------------------------------
Saving results to: 
/Users/gardar/Documents/UCL/ELEC0054 IMLS Research Project/04 Implementation/04 Leakage Detection/2018_predictions.csv


Signal:	0	 Execution:	0.299 s	 Elapsed:	0.299 s
Signal:	1	 Execution:	0.310 s	 Elapsed:	0.609 s
Signal:	2	 Execution:	0.283 s	 Elapsed:	0.892 s
Signal:	3	 Execution:	0.278 s	 Elapsed:	1.170 s
Signal:	4	 Execution:	0.255 s	 Elapsed:	1.425 s
Signal:	5	 Execution:	0.24

## 6. Read in Yearly Prediction and Scale Back to Original Interval

In [12]:
df = pd.read_csv('2018_predictions.csv', index_col='Unnamed: 0')
df.columns = ['n{}'.format(int(node)+1) for node in df.columns]
df = df*scale+bias
df.index = pd.date_range(start='2018-01-01 00:00:00',
                         periods=len(df),
                         freq = '5min')