In [None]:
import pandas as pd
import numpy as np

This notebook creates fault zones using geographical clustering based hop count.

## Inputting the graph (nodes and links) & extracting all indices

In [None]:
data = pd.read_csv('data/node_connection_pipe.csv')
data.head()

Unnamed: 0,link_id,start_node,end_node,length(m),diameter(mm)
0,p1,n62,n61,26.8746,198.6771
1,p2,n66,n64,14.0603,145.6646
2,p3,n86,n90,52.2554,194.2088
3,p4,n71,n70,29.4554,96.0532
4,p5,n3,n2,22.9449,99.3313)


In [None]:
zone_clusters = pd.DataFrame(data['link_id'])

In [None]:
df = data.drop([' length(m)', ' diameter(mm)'], axis=1)

In [None]:
list1 = list(data[' start_node'].unique())
list2 = list(data[' end_node'].unique())

nodes = list(set(list1 + list2))

print(nodes)

['n205', 'n280', 'n743', 'n429', 'n669', 'n695', 'n98', 'n268', 'n523', 'n299', 'n82', 'n687', 'n654', 'n249', 'n535', 'n658', 'n118', 'n354', 'n440', 'n482', 'n711', 'n775', 'n603', 'n33', 'n80', 'n627', 'n113', 'n545', 'n291', 'n641', 'n739', 'n97', 'n247', 'n136', 'n637', 'n531', 'n351', 'n75', 'n497', 'n652', 'n700', 'n235', 'n279', 'n322', 'n668', 'n529', 'n276', 'n141', 'n124', 'n759', 'R1', 'n197', 'n207', 'n692', 'n638', 'n537', 'n258', 'n350', 'n478', 'n386', 'n631', 'n554', 'n587', 'n727', 'n493', 'n571', 'n469', 'n264', 'n651', 'n142', 'n718', 'n11', 'n253', 'n138', 'n507', 'n3', 'n604', 'n102', 'n443', 'n202', 'n20', 'n371', 'n252', 'n454', 'n242', 'n439', 'n217', 'n672', 'n676', 'n679', 'n31', 'n722', 'n745', 'n689', 'n744', 'n132', 'n778', 'n405', 'n729', 'n54', 'n222', 'n670', 'n215', 'n781', 'n561', 'n92', 'n696', 'n216', 'n665', 'n241', 'n593', 'n246', 'n496', 'n774', 'n127', 'n576', 'n212', 'n333', 'n704', 'n409', 'n674', 'n520', 'n220', 'n318', 'n233', 'n477', 'n115'

In [None]:
node_positions = {node: i for i, node in enumerate(nodes)}
print(node_positions)

{'n205': 0, 'n280': 1, 'n743': 2, 'n429': 3, 'n669': 4, 'n695': 5, 'n98': 6, 'n268': 7, 'n523': 8, 'n299': 9, 'n82': 10, 'n687': 11, 'n654': 12, 'n249': 13, 'n535': 14, 'n658': 15, 'n118': 16, 'n354': 17, 'n440': 18, 'n482': 19, 'n711': 20, 'n775': 21, 'n603': 22, 'n33': 23, 'n80': 24, 'n627': 25, 'n113': 26, 'n545': 27, 'n291': 28, 'n641': 29, 'n739': 30, 'n97': 31, 'n247': 32, 'n136': 33, 'n637': 34, 'n531': 35, 'n351': 36, 'n75': 37, 'n497': 38, 'n652': 39, 'n700': 40, 'n235': 41, 'n279': 42, 'n322': 43, 'n668': 44, 'n529': 45, 'n276': 46, 'n141': 47, 'n124': 48, 'n759': 49, 'R1': 50, 'n197': 51, 'n207': 52, 'n692': 53, 'n638': 54, 'n537': 55, 'n258': 56, 'n350': 57, 'n478': 58, 'n386': 59, 'n631': 60, 'n554': 61, 'n587': 62, 'n727': 63, 'n493': 64, 'n571': 65, 'n469': 66, 'n264': 67, 'n651': 68, 'n142': 69, 'n718': 70, 'n11': 71, 'n253': 72, 'n138': 73, 'n507': 74, 'n3': 75, 'n604': 76, 'n102': 77, 'n443': 78, 'n202': 79, 'n20': 80, 'n371': 81, 'n252': 82, 'n454': 83, 'n242': 84, '

In [None]:
print(node_positions['n62'])

672


In [None]:
links = df.values.tolist()
print(links)

[['p1', 'n62', 'n61'], ['p2', 'n66', 'n64'], ['p3', 'n86', 'n90'], ['p4', 'n71', 'n70'], ['p5', 'n3', 'n2'], ['p6', 'n12', 'n14'], ['p7', 'n6', 'n9'], ['p8', 'n22', 'n30'], ['p9', 'n67', 'n66'], ['p10', 'n46', 'n48'], ['p11', 'n7', 'n10'], ['p12', 'n9', 'n4'], ['p13', 'n31', 'n27'], ['p14', 'n14', 'n16'], ['p15', 'n15', 'n16'], ['p16', 'n16', 'n18'], ['p17', 'n18', 'n20'], ['p18', 'n18', 'n24'], ['p19', 'n21', 'n25'], ['p20', 'n26', 'n28'], ['p21', 'n27', 'n34'], ['p22', 'n28', 'n33'], ['p23', 'n29', 'n32'], ['p24', 'n32', 'n36'], ['p25', 'n33', 'n35'], ['p26', 'n34', 'n37'], ['p27', 'n35', 'n38'], ['p28', 'n36', 'n41'], ['p29', 'n37', 'n45'], ['p30', 'n37', 'n39'], ['p31', 'n40', 'n42'], ['p32', 'n42', 'n43'], ['p33', 'n43', 'n45'], ['p34', 'n50', 'n55'], ['p35', 'n51', 'n53'], ['p36', 'n52', 'n56'], ['p37', 'n54', 'n57'], ['p38', 'n60', 'n61'], ['p39', 'n61', 'n65'], ['p40', 'n62', 'n64'], ['p41', 'n63', 'n65'], ['p42', 'n72', 'n73'], ['p43', 'n73', 'n74'], ['p44', 'n74', 'n76'], ['p

## Clustering nodes geographically on the basis of hop count

In [None]:
from sklearn.cluster import KMeans

edge_length = 1

# Create the feature matrix
num_links = len(links)
features = np.zeros((num_links, 2))
for i, link in enumerate(links):
    node1_pos = node_positions[link[1]]
    node2_pos = node_positions[link[2]]
    squared_distance = (node1_pos - node2_pos) ** 2
    features[i, :] = [squared_distance, edge_length]

# k-means clustering
num_zones = 5
kmeans = KMeans(n_clusters=num_zones).fit(features)

# Assign links to zones
zone_assignments = kmeans.labels_



## Printing zones and links

In [None]:
for i in range(num_zones):
    print(f"Zone {i+1}:")
    for j, link in enumerate(links):
        if zone_assignments[j] == i:
            print(f"\tLink {link[0]}: ({link[1]}, {link[2]})")

Zone 1:
	Link p1: (n62, n61)
	Link p5: (n3, n2)
	Link p8: (n22, n30)
	Link p13: (n31, n27)
	Link p18: (n18, n24)
	Link p20: (n26, n28)
	Link p21: (n27, n34)
	Link p23: (n29, n32)
	Link p28: (n36, n41)
	Link p29: (n37, n45)
	Link p39: (n61, n65)
	Link p52: (n91, n88)
	Link p56: (n310, n312)
	Link p57: (n218, n219)
	Link p58: (n277, n278)
	Link p60: (n309, n308)
	Link p69: (n314, n316)
	Link p74: (n233, n236)
	Link p75: (n231, n233)
	Link p77: (n302, n301)
	Link p81: (n126, n121)
	Link p84: (n162, n159)
	Link p87: (n156, n157)
	Link p91: (n145, n146)
	Link p95: (n94, n97)
	Link p98: (n101, n103)
	Link p104: (n105, n113)
	Link p105: (n110, n115)
	Link p118: (n141, n145)
	Link p124: (n157, n160)
	Link p132: (n171, n173)
	Link p143: (n197, n199)
	Link p148: (n209, n210)
	Link p156: (n228, n232)
	Link p168: (n262, n267)
	Link p171: (n271, n274)
	Link p173: (n281, n282)
	Link p174: (n282, n286)
	Link p175: (n283, n289)
	Link p176: (n284, n293)
	Link p179: (n291, n292)
	Link p183: (n299, n305)

In [None]:
zone_clusters['zone'] = zone_assignments

In [None]:
zone_clusters.head()

Unnamed: 0,link_id,zone
0,p1,0
1,p2,2
2,p3,2
3,p4,2
4,p5,0


In [None]:
zone_clusters.to_csv('wdn_zones.csv')

In [None]:
print(kmeans.labels_)

[0 2 2 2 0 2 2 0 2 2 2 2 0 2 1 1 2 0 2 0 0 2 0 2 2 2 2 0 0 1 2 2 2 2 2 2 2
 2 0 2 2 2 2 1 2 2 2 2 2 2 2 0 2 2 2 0 0 0 2 0 2 2 1 2 2 2 2 2 0 2 2 2 2 0
 0 1 0 2 2 2 0 2 2 0 2 2 0 2 1 2 0 2 2 2 0 2 2 0 2 2 1 2 2 0 0 2 2 2 2 1 2
 2 2 2 2 2 2 0 1 2 2 2 1 0 2 2 2 2 2 2 2 0 2 2 2 1 2 1 1 2 2 2 0 2 2 2 2 0
 2 2 2 1 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 0 2 2 0 2 0 0 0 0 1 2 0 2 2 2 0 1 2
 2 2 0 2 0 2 0 2 2 2 1 2 1 2 2 2 0 2 2 1 2 0 2 0 0 2 2 1 2 0 2 2 2 2 2 2 2
 0 0 2 2 0 2 2 2 2 0 2 2 2 0 2 0 1 0 2 0 0 2 0 2 0 2 2 0 2 0 2 0 0 2 2 1 2
 2 0 2 2 2 2 2 0 2 1 2 2 0 0 1 2 2 2 2 2 0 1 0 2 2 2 0 2 2 2 2 2 2 2 2 2 0
 2 0 2 2 2 2 2 0 0 2 2 2 0 0 2 2 2 1 2 0 2 2 2 2 0 1 2 2 2 2 0 2 2 2 0 1 0
 2 2 2 2 0 2 2 2 2 2 0 2 2 2 0 2 2 2 1 2 2 2 2 2 2 0 2 2 0 2 0 0 0 1 1 2 2
 2 2 0 2 0 0 2 1 0 2 2 2 2 2 1 2 0 0 2 2 2 2 0 2 0 0 2 2 2 2 2 0 0 2 2 2 2
 1 2 2 0 2 2 2 2 2 1 2 2 2 0 2 2 2 2 0 0 0 2 2 2 2 2 2 2 0 0 0 2 2 0 0 0 2
 0 2 0 2 0 2 0 2 2 2 2 2 0 2 0 0 2 2 2 2 2 0 2 2 2 2 2 0 2 0 2 2 1 0 2 2 1
 2 2 0 0 2 0 2 1 2 2 2 2 

In [None]:
node_a = list(data[' start_node'])
node_b = list(data[' end_node'])
length = list(data[' length(m)'])
nodeDictionary = {}
used_nodes = []
for i in range (len(node_a)):
  if node_a[i] not in used_nodes :
    nodeDictionary[(node_a[i])]={'name': node_a[i], 'c': 0}
    used_nodes.append(node_a[i])
for i in range (len(node_b)):
  if node_b[i] not in used_nodes :
    nodeDictionary[(node_b[i])]={'name': node_b[i], 'c': 0}
    used_nodes.append(node_b[i])

print(nodeDictionary)
edgeDictionary = {}
for i in range (len(node_a)):
  edgeDictionary[(node_a[i],node_b[i])]={'cost': length[i]}
print(edgeDictionary)
used_nodes = []
indexDictionary = {}
count = 0
for i in range (0,len(node_a)):
  if node_a[i] not in used_nodes:
    used_nodes.append(node_a[i])
    indexDictionary[node_a[i]]=count
    count = count + 1
for i in range (0,len(node_b)):
  if node_b[i] not in used_nodes:
    used_nodes.append(node_b[i])
    indexDictionary[node_b[i]]= count
    count = count + 1
print(indexDictionary)

{'n62': {'name': 'n62', 'c': 0}, 'n66': {'name': 'n66', 'c': 0}, 'n86': {'name': 'n86', 'c': 0}, 'n71': {'name': 'n71', 'c': 0}, 'n3': {'name': 'n3', 'c': 0}, 'n12': {'name': 'n12', 'c': 0}, 'n6': {'name': 'n6', 'c': 0}, 'n22': {'name': 'n22', 'c': 0}, 'n67': {'name': 'n67', 'c': 0}, 'n46': {'name': 'n46', 'c': 0}, 'n7': {'name': 'n7', 'c': 0}, 'n9': {'name': 'n9', 'c': 0}, 'n31': {'name': 'n31', 'c': 0}, 'n14': {'name': 'n14', 'c': 0}, 'n15': {'name': 'n15', 'c': 0}, 'n16': {'name': 'n16', 'c': 0}, 'n18': {'name': 'n18', 'c': 0}, 'n21': {'name': 'n21', 'c': 0}, 'n26': {'name': 'n26', 'c': 0}, 'n27': {'name': 'n27', 'c': 0}, 'n28': {'name': 'n28', 'c': 0}, 'n29': {'name': 'n29', 'c': 0}, 'n32': {'name': 'n32', 'c': 0}, 'n33': {'name': 'n33', 'c': 0}, 'n34': {'name': 'n34', 'c': 0}, 'n35': {'name': 'n35', 'c': 0}, 'n36': {'name': 'n36', 'c': 0}, 'n37': {'name': 'n37', 'c': 0}, 'n40': {'name': 'n40', 'c': 0}, 'n42': {'name': 'n42', 'c': 0}, 'n43': {'name': 'n43', 'c': 0}, 'n50': {'name':

In [None]:
import networkx as nx
import matplotlib.pyplot as plt


In [None]:
# Create graph
G = nx.Graph()
for d in links:
    G.add_edge(d[1], d[2], label=d[0])

# Define colors for each cluster
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']

# Draw graph with nodes colored by cluster
pos = nx.spring_layout(G)
for i, c in enumerate(set(cluster_assignments)):
    nodes = [data[j][1] for j in range(len(data)) if cluster_assignments[j] == c]
    nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color=colors[i % len(colors)])
nx.draw_networkx_edges(G, pos)
nx.draw_networkx_labels(G, pos)
nx.draw_networkx_edge_labels(G, pos, edge_labels={(d[1], d[2]): d[0] for d in data})
plt.show()