# Sensor Correlation

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from natsort import natsorted
import networkx as nx
import epynet 
import yaml

In [2]:
# --------------------------
# Importing custom libraries
# Source: https://github.com/GardarGardarsson/GSP_for_Leak_Detection/tree/864a99041af6331375946a8fad3a71c633108eb8
# --------------------------

# Import a custom tool for converting EPANET .inp files to networkx graphs
from ep_utils.epanet_loader import get_nx_graph

# Function for visualisationa
from ep_utils.visualisation import visualise

# EPANET simulator, used to generate nodal pressures from the nominal model
from ep_utils.epanet_simulator import epanetSimulator

# SCADA timeseries dataloader
from ep_utils.data_loader import battledimLoader, dataCleaner, dataGenerator

In [3]:
path_to_wdn = './data/l-town-data/L-TOWN_Real.inp'

In [4]:
# Import the .inp file using the EPYNET library
wdn = epynet.Network(path_to_wdn)

# Solve hydraulic model for a single timestep
wdn.solve()

In [5]:
G, _ = get_nx_graph(wdn, weight_mode='real_pipe_length', get_head=False)

In [6]:
neighbours_by_pipe = {}

for node in G:
    for neighbour, connecting_edge in G[node].items():
        if connecting_edge['name'] == 'SELF':
            continue
        else:
            neighbours_by_pipe[connecting_edge['name']] = [node, neighbour]
            
pipe_by_neighbours = { str(neighbour_list) : pipe for pipe , neighbour_list in neighbours_by_pipe.items()}

In [7]:
# Open the dataset configuration file
with open('config.yaml') as file:
    # Load the configuration to a dictionary
    config = yaml.load(file, Loader=yaml.FullLoader) 

In [8]:
node_A = [int(string.replace("n", "")) for string 
          in config['zone2']+config['zone3']+config['zone4']+config['zone5']+config['zone7']+config['zone8']+config['zone9']+config['zone10']]
node_B = [int(string.replace("n", "")) for string in config['zone6']]
node_C = [int(string.replace("n", "")) for string in config['zone1']]
sensors = [int(string.replace("n", "")) for string in config['pressure_sensors']]
sensors_A = [_ for _ in sensors if _ in node_A]
sensors_B = [_ for _ in sensors if _ in node_B]
sensors_C = [_ for _ in sensors if _ in node_C]

In [9]:
G_A = G.copy()
for node in G.nodes:
    if node not in node_A:
        G_A.remove_node(node)

In [10]:
G_B = G.copy()
for node in G.nodes:
    if node not in node_B:
        G_B.remove_node(node)

In [11]:
G_C = G.copy()
for node in G.nodes:
    if node not in node_C:
        G_C.remove_node(node)

In [12]:
df = pd.DataFrame(index=G_A.nodes())

for sensor_node in sensors_A:
    df[sensor_node] = df.index.map(lambda node: nx.shortest_path_length(G_A, source=node, target=sensor_node, weight='weight'))

In [13]:
df

Unnamed: 0,54,105,114,163,188,288,296,332,342,410,...,549,613,636,644,679,722,726,740,752,769
46,341.5241,1095.5615,1437.9971,867.0471,1222.6893,2089.8754,2190.3036,971.1539,1037.2252,367.2180,...,1263.2141,1369.8608,1426.1601,1442.6237,1797.5195,2374.9328,2184.6316,2386.0427,2424.2039,2744.8814
47,332.7845,1086.8219,1429.2575,858.3075,1213.9497,2081.1358,2181.5640,962.4143,1028.4856,375.9576,...,1254.4745,1361.1212,1417.4205,1433.8841,1788.7799,2366.1932,2175.8920,2377.3031,2415.4643,2736.1418
48,402.9254,1156.9628,1499.3984,928.4484,1284.0906,2066.2213,2166.6495,1032.5552,1098.6265,305.8167,...,1324.6154,1431.2621,1402.5060,1418.9696,1773.8654,2351.2787,2160.9775,2362.3886,2400.5498,2721.2273
49,160.5072,1012.3225,1354.7581,821.5817,1177.2239,2180.1164,2280.5446,887.9149,1034.7430,854.9936,...,1179.9751,1367.3786,1516.4011,1532.8647,1887.7605,2465.1738,2274.8726,2476.2837,2514.4449,2835.1224
50,590.4312,1344.4686,1686.9042,1115.9542,1471.5964,1998.8783,2099.3065,1220.0610,1265.3632,118.3109,...,1512.1212,1505.2373,1335.1630,1351.6266,1706.5224,2283.9357,2093.6345,2295.0456,2333.2068,2653.8843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,998.2812,1409.1963,1578.3727,782.9584,963.1590,1067.3870,1267.0254,1083.7061,87.2991,1059.7532,...,1252.3844,415.7966,502.8819,420.1353,826.8274,1451.6546,1213.9395,1462.7645,1453.5118,1821.6032
779,955.5227,1366.4378,1535.6142,740.1999,924.5386,1110.1455,1309.7839,1040.9476,44.5406,1102.5117,...,1209.6259,377.1762,545.6404,462.8938,869.5859,1494.4131,1256.6980,1505.5230,1496.2703,1864.3617
780,1080.6586,1430.2763,1599.4527,804.0384,880.7816,1061.1410,1260.7794,1104.7861,169.6765,1053.5072,...,1210.2056,333.4192,496.6359,413.8893,820.5814,1445.4086,1207.6935,1456.5185,1447.2658,1815.3572
781,1054.4319,1384.5911,1553.7675,758.3532,835.0964,1106.8262,1306.4646,1059.1009,143.4498,1099.1924,...,1164.5204,287.7340,542.3211,459.5745,866.2666,1491.0938,1253.3787,1502.2037,1492.9510,1861.0424


In [14]:
top_columns = dict(df.apply(lambda row: row.nsmallest(4).index.tolist(), axis=1))

In [15]:
result = {}
for key in top_columns.keys():
    result[f'n{key}'] = [f'n{_}' for _ in top_columns[key]]
for n in node_B:
    result[f'n{n}'] = [f'n{_}' for _ in sensors_B]
for n in node_C:
    result[f'n{n}'] = [f'n{_}' for _ in sensors_C]

In [17]:
result_dict = {}
remote_dict = {}

for node_name, distances in df.iterrows():
    nearby_sensors = []
    remote_sensors = []
    
    for sensor, distance in distances.items():
        if distance <= 300:
            nearby_sensors.append((f'n{sensor}', distance))
        if distance > 1300:
            remote_sensors.append((f'n{sensor}', distance))
    
    result_dict[f'n{node_name}'] = nearby_sensors
    remote_dict[f'n{node_name}'] = remote_sensors
    

In [18]:
sensors_list = {

# 'zone1' : ['n1','n4','n31'],

'zone2' : ['n410','n429'],

'zone3' : ['n342','n636','n644'],

'zone4' : ['n296','n679','n722','n740'],

'zone5' : ['n288','n726','n752','n769'],

# 'zone6' : ['n215','n229'],

'zone7' : ['n163','n188','n613'],

'zone8' : ['n332','n495','n506','n549'],

'zone9' : ['n105','n114','n469', 'n516'],

'zone10' : ['n54','n415','n458', 'n519']
    
}

In [19]:
result_new = {}

for zone_name in sensors_list.keys():
    for node_name in config[zone_name]:
        tmp = set(sensors_list[zone_name] + [x[0] for x in result_dict[node_name]])
        remote = set([x[0] for x in remote_dict[node_name]])
        result_new[node_name] = list(tmp.difference(remote))
        
for n in node_B:
    result_new[f'n{n}'] = [f'n{_}' for _ in sensors_B]
for n in node_C:
    result_new[f'n{n}'] = [f'n{_}' for _ in sensors_C]

In [53]:
file_path = 'correlate_sensors.yaml'


with open(file_path, 'w') as file:
    yaml.dump(result_new, file, default_flow_style=False)