# Dataset creation

## Imports

In [None]:
#Basic
import os
import wntr
import pandas as pd
import numpy as np
import pickle
import networkx as nx
import time

#WNTR
import wntr

#torch and PyG
import torch
#from torch_geometric.data import Data
from torch_geometric.utils import convert

#Utilities
from database_fn import *

#Window explorer for the file
from tkinter import Tk     # from tkinter import Tk for Python 3.x
from tkinter.filedialog import askopenfilename

## Loading inp

In [None]:
Tk().withdraw() 
inp_file_path = askopenfilename(title='Select a network', filetypes=[('inp files', '*.inp')]) # show an "Open" dialog box and return the path to the selected file
print(inp_file_path)

wdn_name = inp_file_path[-7:-4]

In [None]:
wn_WDS, _, _ = load_inp(inp_file_path)

## Information of the network
Available diameters, units factor to meters, and minimum pressure at each node.
CSV file

In [None]:
csv_file_path = inp_file_path[:-4]+'_info.csv
print(csv_file_path)

In [None]:
avail_diams, min_pres, UNIT_TO_M = diams_min_pres(csv_file_path)

## Database creation

In [None]:
num_pipes = wn_WDS.num_pipes
pipe_names = wn_WDS.link_name_list

seed = 1234567
np.random.seed(seed)

all_geom_data = [] # geometric data
fail_DB = []
N_iter = 5000
sim=0

#Optional probability distribution. This one is biased towards the bigger diameters. 
p = [(i+1)/len(avail_diams) for i in range(len(avail_diams))]
p = [p[i]/sum(p) for i in range(len(p))]

In [None]:
start = time.time()
while len(all_geom_data)<N_iter and len(fail_DB)<N_iter:
    
    # echo on screen
    if sim % 500 == 0 or sim == N_iter-1:
        print(f'iter: {sim}')
  
    # assign diameters    
    new_diams = np.random.choice(avail_diams, num_pipes, replace= True, p = p)
    new_diams = new_diams * UNIT_TO_M
    
    for i in range(num_pipes):
        c_pipe = wn_WDS.get_link(pipe_names[i])
        c_pipe.diameter = new_diams[i]
    
    # run the simulation
    try:
    # Simulate hydraulics
        sim_WDS = wntr.sim.EpanetSimulator(wn_WDS)
        results_WDS = sim_WDS.run_sim()
        flowrate = 0 #There are no pumps
        
        RI_PP = PP_index(results_WDS.node['head'], results_WDS.node['pressure'], results_WDS.node['demand'], flowrate, wn_WDS, min_pres)
        RI_Todini = wntr.metrics.hydraulic.todini_index(results_WDS.node['head'], results_WDS.node['pressure'], results_WDS.node['demand'], flowrate, wn_WDS, min_pres)

        
        # create PyG Data 
        data = convert.from_networkx(from_wntr_to_nx(wn_WDS))

        #Add diamters for MLP
        data.diameters = torch.tensor(new_diams)
        
        #Add resilience indexes
        data.res_Todini = torch.tensor(RI_Todini)
        data.res_PP = torch.tensor(RI_PP)
        
        #Add simulaton results
        data.pressure = torch.tensor(results_WDS.node['pressure'].values)
        data.demand = torch.tensor(results_WDS.node['demand'].values)
        
        if data.pressure.min() < -1e-1:
            sim+=1
            continue
            
        all_geom_data.append(data)
    
    except Exception as e:
        fail_row = [sim]
        fail_row.append(np.round(new_diams/UNIT_TO_M, 0))
        fail_row.append(wn_WDS)
        fail_row.append(e)
        print(e)
        fail_DB.append(fail_row)
    sim+=1
    
total_time = time.time() - start
print("Total simulations: ", sim)

In [None]:
all_geom_data[1] #Example of the information of the objects on the list

## Save the pickle file

In [None]:
# Datasets folder
dir_path = os.getcwd()+'\Datasets'  

print(dir_path)

In [None]:
try:
    pickle.dump(all_geom_data,open(f"{dir_path}\\{wdn_name}\\{wdn_name}_{N_iter}.p", "wb" ))
except OSError:
    print ("Creation of the directory %s failed" % dir_path)
else:
    print ("Successfully created the directory %s " % dir_path)

In [None]:
#Save computational time
with open(f"{dir_path}\\{wdn_name}\\time.txt", 'w') as file:
    file.write(str(total_time))