In [3]:
import pandas as pd
import numpy as np
import math
import shutil
import glob
import os
import gc
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib
import multiprocessing as mp
import pyswmm
from pyswmm import Simulation, Links
import swmmio
import swmmio.utils.modify_model

matplotlib.use(backend='agg')

In [4]:
pd.reset_option('^display.', silent=True)

# inputs

In [5]:
input_idf_table_file_path = './Assignment_3__Input_FE/Data_Centroid/BOM_IDF_Data/depths_-33.8774_151.093_all_design.csv'
input_temp_pattern_file_path = './Assignment_3__Input_FE/Data_Centroid/ECsouth/ECsouth_Increments.csv'
input_storm_stats_file_path = './Assignment_3__Input_FE/Data_Centroid/ECsouth/ECsouth_AllStats.csv'
output_dir = './Assignment_3__Output_FE__1st__centroid/'
original_inp_file_path = './Assignment_3__Input_FE/971007_SW5.INP'
simulation_starting_time = '2000/01/01 00:00:00'
stopping_time_after_precipitation_finish = {'days':0, 'hours':12, 'minutes':0, 'seconds':0}
report_step = {'days':0, 'hours':0, 'minutes':5, 'seconds':0}
wet_step = {'days':0, 'hours':0, 'minutes':5, 'seconds':0}
dry_step = {'days':0, 'hours':0, 'minutes':5, 'seconds':0}
routing_step = {'days':0, 'hours':0, 'minutes':0, 'seconds':30}
link_to_get_results = 116
timeseries_name = 'RainGauge'

simulation_starting_time = pd.to_datetime(simulation_starting_time)
stopping_time_after_precipitation_finish = pd.Timedelta(**stopping_time_after_precipitation_finish)
report_step = pd.Timedelta(**report_step)
wet_step = pd.Timedelta(**wet_step)
dry_step = pd.Timedelta(**dry_step)
routing_step = pd.Timedelta(**routing_step)
link_to_get_results = str(link_to_get_results)

print(
    input_idf_table_file_path,
    input_temp_pattern_file_path,
    input_storm_stats_file_path,
    original_inp_file_path,
    simulation_starting_time,
    stopping_time_after_precipitation_finish,
    report_step,
    wet_step,
    dry_step,
    routing_step,
    link_to_get_results,
    timeseries_name,
    sep='\n'
    )

./Assignment_3__Input_FE/Data_Centroid/BOM_IDF_Data/depths_-33.8774_151.093_all_design.csv
./Assignment_3__Input_FE/Data_Centroid/ECsouth/ECsouth_Increments.csv
./Assignment_3__Input_FE/Data_Centroid/ECsouth/ECsouth_AllStats.csv
./Assignment_3__Input_FE/971007_SW5.INP
2000-01-01 00:00:00
0 days 12:00:00
0 days 00:05:00
0 days 00:05:00
0 days 00:05:00
0 days 00:00:30
116
RainGauge


In [6]:
def create_output_dir(arg_output_dir):
    """create output directory if it does not exist

    arguments:
        [string] --> arg_output_dir = path of the output directory name
    """
    if not os.path.exists(arg_output_dir):
        os.makedirs(arg_output_dir)

In [7]:
create_output_dir(output_dir)
output_dir

'./Assignment_3__Output_FE__1st__centroid/'

In [8]:
inp_file_dir = '{}inp_file_dir/'.format(output_dir)
create_output_dir(inp_file_dir)
inp_file_dir

'./Assignment_3__Output_FE__1st__centroid/inp_file_dir/'

In [9]:
rain_pattern_cum_dir = '{}rain_pattern_cum_dir/'.format(output_dir)
create_output_dir(rain_pattern_cum_dir)
rain_pattern_cum_dir

'./Assignment_3__Output_FE__1st__centroid/rain_pattern_cum_dir/'

In [10]:
precipitation_dir = '{}precipitation_dir/'.format(output_dir)
create_output_dir(precipitation_dir)
precipitation_dir

'./Assignment_3__Output_FE__1st__centroid/precipitation_dir/'

In [11]:
flow_rate_dir = '{}flow_rate_dir/'.format(output_dir)
create_output_dir(flow_rate_dir)
flow_rate_dir

'./Assignment_3__Output_FE__1st__centroid/flow_rate_dir/'

In [12]:
graphs_pattern_dir = '{}graphs_pattern_dir/'.format(output_dir)
create_output_dir(graphs_pattern_dir)
graphs_pattern_dir

'./Assignment_3__Output_FE__1st__centroid/graphs_pattern_dir/'

In [13]:
graphs_frequency_dir = '{}graphs_frequency_dir/'.format(output_dir)
create_output_dir(graphs_frequency_dir)
graphs_frequency_dir

'./Assignment_3__Output_FE__1st__centroid/graphs_frequency_dir/'

In [14]:
results_frequency_dir = '{}results_frequency_dir/'.format(output_dir)
create_output_dir(results_frequency_dir)
results_frequency_dir

'./Assignment_3__Output_FE__1st__centroid/results_frequency_dir/'

In [284]:
results_stats_dir = '{}results_stats_dir/'.format(output_dir)
create_output_dir(results_stats_dir)
results_stats_dir

'./Assignment_3__Output_FE__1st__centroid/results_stats_dir/'

# Threads

In [15]:
n_core = mp.cpu_count() - 1
n_core

13

# setting up the inp file

In [16]:
inp_file_path = '{}inp_template.inp'.format(output_dir)
inp_file_path

'./Assignment_3__Output_FE__1st__centroid/inp_template.inp'

In [17]:
# copy the original inp file
shutil.copyfile(src=original_inp_file_path, dst=inp_file_path)

'./Assignment_3__Output_FE__1st__centroid/inp_template.inp'

In [18]:
options_inp_file = swmmio.utils.dataframes.dataframe_from_inp(inp_path=inp_file_path, section='[OPTIONS]')
options_inp_file.at['START_DATE', 'Value'] = simulation_starting_time.strftime(format='%m/%d/%Y')
options_inp_file.at['START_TIME', 'Value'] = simulation_starting_time.strftime(format='%H:%M:%S')
options_inp_file.at['REPORT_START_DATE', 'Value'] = simulation_starting_time.strftime(format='%m/%d/%Y')
options_inp_file.at['REPORT_START_TIME', 'Value'] = simulation_starting_time.strftime(format='%H:%M:%S')
options_inp_file.at['END_DATE', 'Value'] = (simulation_starting_time + stopping_time_after_precipitation_finish).strftime(format='%m/%d/%Y')
options_inp_file.at['END_TIME', 'Value'] = (simulation_starting_time + stopping_time_after_precipitation_finish).strftime(format='%H:%M:%S')
options_inp_file.at['REPORT_STEP', 'Value'] = str(report_step).split(sep=' ')[-1]
options_inp_file.at['WET_STEP', 'Value'] = str(wet_step).split(sep=' ')[-1]
options_inp_file.at['DRY_STEP', 'Value'] = str(dry_step).split(sep=' ')[-1]
options_inp_file.at['ROUTING_STEP', 'Value'] = str(routing_step).split(sep=' ')[-1]
options_inp_file.at['THREADS', 'Value'] = n_core

options_inp_file

Unnamed: 0_level_0,Value
Key,Unnamed: 1_level_1
FLOW_UNITS,CMS
INFILTRATION,HORTON
FLOW_ROUTING,KINWAVE
LINK_OFFSETS,DEPTH
MIN_SLOPE,0
ALLOW_PONDING,NO
SKIP_STEADY_STATE,NO
START_DATE,01/01/2000
START_TIME,00:00:00
REPORT_START_DATE,01/01/2000


In [19]:
raingauges_inp_file = swmmio.utils.dataframes.dataframe_from_inp(inp_path=inp_file_path, section='[RAINGAGES]')
raingauges_inp_file = raingauges_inp_file.iloc[:1]
raingauges_inp_file.index = pd.Index(data=[timeseries_name], name='Name')
raingauges_inp_file.at[timeseries_name, 'TimeIntrvl'] = str(pd.Timedelta(minutes=5)).split(sep=' ')[-1].rsplit(sep=':', maxsplit=1)[0]
raingauges_inp_file.at[timeseries_name, 'DataSourceName'] = timeseries_name

raingauges_inp_file

Unnamed: 0_level_0,RainType,TimeIntrvl,SnowCatch,DataSource,DataSourceName
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RainGauge,VOLUME,00:05,1.0,TIMESERIES,RainGauge


In [20]:
subcatchments_inp_file = swmmio.utils.dataframes.dataframe_from_inp(inp_path=inp_file_path, section='[SUBCATCHMENTS]')
subcatchments_inp_file.Raingage = timeseries_name

subcatchments_inp_file

Unnamed: 0_level_0,Raingage,Outlet,Area,PercImperv,Width,PercSlope,CurbLength
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2101,RainGauge,101,8.46,33.5,300.0,3.50,0
2401,RainGauge,401,5.87,33.5,230.0,4.50,0
2102,RainGauge,102,5.87,33.5,100.0,4.80,0
2103,RainGauge,103,4.25,33.5,240.0,4.50,0
2104,RainGauge,104,12.22,33.5,500.0,4.00,0
...,...,...,...,...,...,...,...
2921,RainGauge,921,0.01,33.5,0.0,0.00,0
2922,RainGauge,922,0.01,33.5,0.0,0.00,0
2923,RainGauge,923,37.15,33.5,850.0,4.55,0
2924,RainGauge,924,19.86,33.5,585.0,4.55,0


In [21]:
timeseries_inp_file = swmmio.utils.dataframes.dataframe_from_inp(inp_path=inp_file_path, section='[TIMESERIES]')
timeseries_inp_file.reset_index(inplace=True)
timeseries_inp_file.drop(index=timeseries_inp_file.index[1:], inplace=True)
timeseries_inp_file.at[0, 'Name'] = timeseries_name
timeseries_inp_file.set_index(keys='Name', inplace=True)
timeseries_inp_file.at[timeseries_name, 'Date'] = simulation_starting_time.strftime(format='%m/%d/%Y')
timeseries_inp_file.at[timeseries_name, 'Time'] = simulation_starting_time.strftime(format='%H:%M')
timeseries_inp_file.at[timeseries_name, 'Value'] = 0.0

timeseries_inp_file

Unnamed: 0_level_0,Date,Time,Value
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RainGauge,01/01/2000,00:00,0.0


In [22]:
[swmmio.utils.modify_model.replace_inp_section(inp_path=inp_file_path, modified_section_header=ind1, new_data=ind2)
for ind1, ind2 in zip(
    ['[OPTIONS]', '[RAINGAGES]', '[SUBCATCHMENTS]', '[TIMESERIES]'], 
    [options_inp_file, raingauges_inp_file, subcatchments_inp_file, timeseries_inp_file]
    )]

[<swmmio.core.Model at 0x7a0544f690f0>,
 <swmmio.core.Model at 0x7a0544f6a8f0>,
 <swmmio.core.Model at 0x7a04eb05e350>,
 <swmmio.core.Model at 0x7a04eb05f040>]

# building idf table

In [23]:
idf_table = pd.read_csv(filepath_or_buffer=input_idf_table_file_path, skiprows=9)
frequency_label = idf_table.columns[2:].to_numpy()
idf_table.columns = idf_table.columns.to_series().apply(
    func=lambda arg: 
        '_'.join(arg.split(sep=' ')) 
        if arg[:8] == 'Duration' 
        else 'freq_' + '_'.join('_perc'.join('_'.join(arg.split(sep=' ')).split(sep='%')).split(sep='.'))
    )
frequency_tag = idf_table.columns[2:].to_numpy()
idf_table[['Duration', 'units']] = idf_table.apply(func=lambda arg: arg.Duration.split(sep=' '), axis=1, result_type='expand')
idf_table = idf_table[idf_table.columns[[0,-1] + list(range(1,len(idf_table.columns) - 1))]]
idf_table.Duration = idf_table.Duration.astype(float)
idf_table.to_csv(path_or_buf='{}table_idf_depths.csv'.format(output_dir), index=False)
idf_table.to_parquet(path='{}table_idf_depths.parquet'.format(output_dir))

idf_table

Unnamed: 0,Duration,units,Duration_in_min,freq_12EY,freq_6EY,freq_4EY,freq_3EY,freq_2EY,freq_63_2_perc,freq_50_perc,...,freq_20_perc,freq_0_2EY,freq_10_perc,freq_5_perc,freq_2_perc,freq_1_perc,freq_1_in_200,freq_1_in_500,freq_1_in_1000,freq_1_in_2000
0,1.0,min,1.0,0.999,1.13,1.37,1.55,1.8,2.28,2.51,...,3.25,3.31,3.74,4.22,4.84,5.32,5.82,6.58,7.17,7.76
1,2.0,min,2.0,1.77,2.0,2.39,2.67,3.07,3.77,4.1,...,5.14,5.25,5.86,6.57,7.52,8.25,9.08,10.3,11.3,12.2
2,3.0,min,3.0,2.41,2.73,3.29,3.68,4.25,5.23,5.71,...,7.21,7.35,8.23,9.24,10.6,11.6,12.8,14.5,15.8,17.1
3,4.0,min,4.0,2.96,3.36,4.07,4.58,5.3,6.56,7.19,...,9.15,9.33,10.5,11.8,13.5,14.9,16.3,18.4,20.1,21.8
4,5.0,min,5.0,3.43,3.91,4.76,5.37,6.23,7.75,8.52,...,10.9,11.1,12.6,14.1,16.2,17.8,19.5,22.1,24.0,26.1
5,10.0,min,10.0,5.15,5.91,7.28,8.27,9.7,12.2,13.6,...,17.6,18.0,20.4,23.0,26.4,29.0,31.6,35.8,39.0,42.2
6,15.0,min,15.0,6.29,7.25,8.96,10.2,12.0,15.3,16.9,...,22.0,22.5,25.5,28.7,33.0,36.2,39.6,44.7,48.7,52.8
7,20.0,min,20.0,7.16,8.26,10.2,11.7,13.7,17.5,19.4,...,25.2,25.7,29.1,32.8,37.7,41.4,45.2,51.2,55.7,60.4
8,25.0,min,25.0,7.86,9.07,11.2,12.8,15.1,19.3,21.4,...,27.7,28.3,31.9,36.0,41.3,45.3,49.6,56.1,61.2,66.3
9,30.0,min,30.0,8.46,9.76,12.1,13.8,16.3,20.8,23.0,...,29.7,30.3,34.2,38.6,44.3,48.6,53.2,60.2,65.6,71.1


# building temporal pattern table

In [24]:
temp_pattern_table = pd.read_csv(filepath_or_buffer=input_temp_pattern_file_path)
temp_pattern_table.rename(columns=lambda arg: arg.strip(), inplace=True)
# temp_pattern_table.rename(columns={'Duration':'Duration_in_min'}, inplace=True)
temp_pattern_table.columns = pd.Index(data=temp_pattern_table.columns[:5].to_list() + ['Increment_{:02}'.format(ind) for ind in range(len(temp_pattern_table.columns[5:]))])
temp_pattern_table.to_csv(path_or_buf='{}table_temp_patterns.csv'.format(output_dir), index=False)
temp_pattern_table.to_parquet(path='{}table_temp_patterns.parquet'.format(output_dir))

temp_pattern_table

Unnamed: 0,EventID,Duration,TimeStep,Region,AEP,Increment_00,Increment_01,Increment_02,Increment_03,Increment_04,...,Increment_46,Increment_47,Increment_48,Increment_49,Increment_50,Increment_51,Increment_52,Increment_53,Increment_54,Increment_55
0,4380,10,5,East Coast (South),frequent,58.06,41.94,,,,...,,,,,,,,,,
1,4382,10,5,East Coast (South),frequent,52.13,47.87,,,,...,,,,,,,,,,
2,4384,10,5,East Coast (South),frequent,60.71,39.29,,,,...,,,,,,,,,,
3,4385,10,5,East Coast (South),frequent,51.51,48.49,,,,...,,,,,,,,,,
4,4386,10,5,East Coast (South),frequent,54.55,45.45,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2856,10080,180,East Coast (South),rare,1.14,1.80,1.36,1.29,3.20,...,0.01,0.01,0.02,0.03,0.32,0.17,0.52,0.00,0.02,0.00
716,5064,10080,180,East Coast (South),rare,0.61,0.93,0.26,0.04,2.91,...,5.27,0.31,0.02,1.21,1.57,0.08,0.05,0.15,0.40,0.11
717,5069,10080,180,East Coast (South),rare,0.00,0.00,0.00,1.06,0.04,...,0.22,0.20,0.57,0.06,2.06,0.00,0.03,0.18,0.12,3.09
718,5071,10080,180,East Coast (South),rare,1.21,0.00,0.00,0.00,0.00,...,0.05,0.56,0.70,3.31,0.10,0.11,0.00,0.00,0.00,0.15


In [26]:
storm_stats_table = pd.read_csv(filepath_or_buffer=input_storm_stats_file_path)
storm_stats_table.rename(columns=lambda arg: arg.strip(), inplace=True)
for ind in [(' ','_'), ('(',''), (')',''), ('%','in_perc'), ('_No.','_Number'), ('_min','_in_min'), ('_mm','_in_mm')]:
    storm_stats_table.rename(columns=lambda arg: arg.replace(*ind), inplace=True)
# storm_stats_table.rename(columns={'Burst_Duration_in_min':'Duration_in_min'}, inplace=True)
storm_stats_table.dropna(axis=0, inplace=True)
storm_stats_table.Burst_Start_Date = pd.to_datetime(arg=storm_stats_table.Burst_Start_Date, format='%m/%d/%Y %H:%M')
storm_stats_table.Burst_End_Date = pd.to_datetime(arg=storm_stats_table.Burst_End_Date, format='%m/%d/%Y %H:%M')
storm_stats_table.Event_ID = storm_stats_table.Event_ID.astype(int)
storm_stats_table.Burst_Duration_in_min = storm_stats_table.Burst_Duration_in_min.astype(int)
storm_stats_table.Burst_Loading = storm_stats_table.Burst_Loading.astype(int)
storm_stats_table.DB_Event_Reference_Number = storm_stats_table.DB_Event_Reference_Number.astype(int)
storm_stats_table.DB_Pluviograph_Reference_Number = storm_stats_table.DB_Pluviograph_Reference_Number.astype(int)
storm_stats_table.to_csv(path_or_buf='{}table_storm_stats.csv'.format(output_dir), index=False)
storm_stats_table.to_parquet(path='{}table_storm_stats.parquet'.format(output_dir))

storm_stats_table

Unnamed: 0,Event_ID,Region,Region_source,Burst_Duration_in_min,Burst_Loading,Original_Burst_Depth_in_mm,AEP_Window,AEP_source_in_perc,Burst_Start_Date,Burst_End_Date,DB_Event_Reference_Number,DB_Pluviograph_Reference_Number,Offical_Gauge,Lat,Long
0,4380,East Coast (South),East Coast (South),10,1,15.50,frequent,14.4679,2009-03-25 17:45:00,2009-03-25 17:50:00,113650,1828,qcd_563064_233,-32.2375,150.6306
1,4382,East Coast (South),East Coast (South),10,1,18.80,frequent,14.4924,1999-12-28 17:05:00,1999-12-28 17:10:00,39777,517,qcd_061250,-32.6296,151.5919
2,4384,East Coast (South),East Coast (South),10,1,16.80,frequent,14.5144,2005-02-04 09:30:00,2005-02-04 09:35:00,42925,556,qcd_068102,-34.4869,150.4019
3,4385,East Coast (South),East Coast (South),10,2,18.50,frequent,15.6473,1999-03-27 22:30:00,1999-03-27 22:35:00,114063,1835,qcd_563079_231,-33.7944,150.5083
4,4386,East Coast (South),East Coast (South),10,2,11.00,frequent,47.6798,1993-04-28 14:05:00,1993-04-28 14:10:00,113366,1824,qcd_563056_231,-34.0333,150.2153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2856,East Coast (South),East Coast (North),10080,1,595.23,rare,1.0865,1996-05-01 00:05:00,1996-05-08 00:00:00,26738,351,qcd_040223,-27.4178,153.1142
716,5064,East Coast (South),East Coast (North),10080,2,589.88,rare,1.2899,1968-01-06 17:10:00,1968-01-13 17:05:00,24711,329,qcd_040093,-26.1831,152.6414
717,5069,East Coast (South),East Coast (North),10080,2,491.26,rare,2.7020,1968-01-06 04:10:00,1968-01-13 04:05:00,23354,313,qcd_039128,-24.9069,152.3230
718,5071,East Coast (South),East Coast (North),10080,2,400.84,rare,2.9538,1968-01-07 11:45:00,1968-01-14 11:40:00,27412,358,qcd_040318,-27.0258,152.5642


# building frequency classification table

In [27]:
ey_constants = np.array(object=[12,6,4,3,2,1,0.5,0.2])
ey_constants

array([12. ,  6. ,  4. ,  3. ,  2. ,  1. ,  0.5,  0.2])

In [28]:
aep_variables = np.multiply(np.add(1, np.negative(np.exp(np.negative(ey_constants)))),100)
aep_variables

array([99.99938558, 99.75212478, 98.16843611, 95.02129316, 86.46647168,
       63.21205588, 39.34693403, 18.12692469])

In [29]:
aep_constants = np.array(object=[50, 20, 10, 5, 2, 1, 0.5, 0.2, 0.1, 0.05, 0.02])
aep_constants

array([5.e+01, 2.e+01, 1.e+01, 5.e+00, 2.e+00, 1.e+00, 5.e-01, 2.e-01,
       1.e-01, 5.e-02, 2.e-02])

In [30]:
ey_variables = np.negative(np.log(np.add(1, np.negative(np.divide(aep_constants, 100)))))
ey_variables

array([6.93147181e-01, 2.23143551e-01, 1.05360516e-01, 5.12932944e-02,
       2.02027073e-02, 1.00503359e-02, 5.01254182e-03, 2.00200267e-03,
       1.00050033e-03, 5.00125042e-04, 2.00020003e-04])

In [31]:
ey = np.flip(m=np.sort(a=np.concatenate((ey_constants, ey_variables))))
ey

array([1.20000000e+01, 6.00000000e+00, 4.00000000e+00, 3.00000000e+00,
       2.00000000e+00, 1.00000000e+00, 6.93147181e-01, 5.00000000e-01,
       2.23143551e-01, 2.00000000e-01, 1.05360516e-01, 5.12932944e-02,
       2.02027073e-02, 1.00503359e-02, 5.01254182e-03, 2.00200267e-03,
       1.00050033e-03, 5.00125042e-04, 2.00020003e-04])

In [32]:
aep_percentage = np.flip(m=np.sort(a=np.concatenate((aep_constants, aep_variables))))
aep_percentage

array([9.99993856e+01, 9.97521248e+01, 9.81684361e+01, 9.50212932e+01,
       8.64664717e+01, 6.32120559e+01, 5.00000000e+01, 3.93469340e+01,
       2.00000000e+01, 1.81269247e+01, 1.00000000e+01, 5.00000000e+00,
       2.00000000e+00, 1.00000000e+00, 5.00000000e-01, 2.00000000e-01,
       1.00000000e-01, 5.00000000e-02, 2.00000000e-02])

In [33]:
aep_1_in_x = np.divide(100, aep_percentage)
aep_1_in_x

array([1.00000614e+00, 1.00248491e+00, 1.01865736e+00, 1.05239570e+00,
       1.15651764e+00, 1.58197671e+00, 2.00000000e+00, 2.54149408e+00,
       5.00000000e+00, 5.51665557e+00, 1.00000000e+01, 2.00000000e+01,
       5.00000000e+01, 1.00000000e+02, 2.00000000e+02, 5.00000000e+02,
       1.00000000e+03, 2.00000000e+03, 5.00000000e+03])

In [34]:
ari = np.divide(1,ey)
ari

array([8.33333333e-02, 1.66666667e-01, 2.50000000e-01, 3.33333333e-01,
       5.00000000e-01, 1.00000000e+00, 1.44269504e+00, 2.00000000e+00,
       4.48142012e+00, 5.00000000e+00, 9.49122158e+00, 1.94957257e+01,
       4.94983165e+01, 9.94991625e+01, 1.99499582e+02, 4.99499833e+02,
       9.99499917e+02, 1.99949996e+03, 4.99949998e+03])

In [36]:
frequency_window = np.array(object=['frequent', 'intermediate', 'rare'])[
    np.add(np.digitize(x=aep_percentage, bins=np.array(object=[100, 14.4, 3.2, 0]), right=False), np.negative(1))
    ]
frequency_window

array(['frequent', 'frequent', 'frequent', 'frequent', 'frequent',
       'frequent', 'frequent', 'frequent', 'frequent', 'frequent',
       'intermediate', 'intermediate', 'rare', 'rare', 'rare', 'rare',
       'rare', 'rare', 'rare'], dtype='<U12')

In [37]:
frequency_tag = np.concatenate((frequency_tag, np.array(object=['freq_1_in_5000'])))
frequency_tag

array(['freq_12EY', 'freq_6EY', 'freq_4EY', 'freq_3EY', 'freq_2EY',
       'freq_63_2_perc', 'freq_50_perc', 'freq_0_5EY', 'freq_20_perc',
       'freq_0_2EY', 'freq_10_perc', 'freq_5_perc', 'freq_2_perc',
       'freq_1_perc', 'freq_1_in_200', 'freq_1_in_500', 'freq_1_in_1000',
       'freq_1_in_2000', 'freq_1_in_5000'], dtype=object)

In [38]:
frequency_label = np.concatenate((frequency_label, np.array(object=['1 in 5000'])))
frequency_label

array(['12EY', '6EY', '4EY', '3EY', '2EY', '63.2%', '50%', '0.5EY', '20%',
       '0.2EY', '10%', '5%', '2%', '1%', '1 in 200', '1 in 500',
       '1 in 1000', '1 in 2000', '1 in 5000'], dtype=object)

In [39]:
frequency_table = pd.DataFrame(
    data=np.column_stack(tup=(ey, aep_percentage, aep_1_in_x, ari, frequency_window, frequency_label, frequency_tag)),
    columns=['EY', 'AEP_percentage', 'AEP_1_in_x', 'ARI', 'freq_window', 'freq_label', 'freq_tag']
    )
frequency_table.to_csv(path_or_buf='{}table_frequency.csv'.format(output_dir), index=False)
frequency_table.to_parquet(path='{}table_frequency.parquet'.format(output_dir))
frequency_table

Unnamed: 0,EY,AEP_percentage,AEP_1_in_x,ARI,freq_window,freq_label,freq_tag
0,12.0,99.999386,1.000006,0.083333,frequent,12EY,freq_12EY
1,6.0,99.752125,1.002485,0.166667,frequent,6EY,freq_6EY
2,4.0,98.168436,1.018657,0.25,frequent,4EY,freq_4EY
3,3.0,95.021293,1.052396,0.333333,frequent,3EY,freq_3EY
4,2.0,86.466472,1.156518,0.5,frequent,2EY,freq_2EY
5,1.0,63.212056,1.581977,1.0,frequent,63.2%,freq_63_2_perc
6,0.693147,50.0,2.0,1.442695,frequent,50%,freq_50_perc
7,0.5,39.346934,2.541494,2.0,frequent,0.5EY,freq_0_5EY
8,0.223144,20.0,5.0,4.48142,frequent,20%,freq_20_perc
9,0.2,18.126925,5.516656,5.0,frequent,0.2EY,freq_0_2EY


In [40]:
r_ey = ey.copy()
r_ey[:14] = r_ey[:14].round(decimals=2)
r_ey[14:17] = r_ey[14:17].round(decimals=3)
r_ey[17:] = r_ey[17:].round(decimals=4)
r_ey

array([1.2e+01, 6.0e+00, 4.0e+00, 3.0e+00, 2.0e+00, 1.0e+00, 6.9e-01,
       5.0e-01, 2.2e-01, 2.0e-01, 1.1e-01, 5.0e-02, 2.0e-02, 1.0e-02,
       5.0e-03, 2.0e-03, 1.0e-03, 5.0e-04, 2.0e-04])

In [41]:
r_aep_percentage = aep_percentage.copy()
r_aep_percentage = r_aep_percentage.round(decimals=2)
r_aep_percentage

array([1.000e+02, 9.975e+01, 9.817e+01, 9.502e+01, 8.647e+01, 6.321e+01,
       5.000e+01, 3.935e+01, 2.000e+01, 1.813e+01, 1.000e+01, 5.000e+00,
       2.000e+00, 1.000e+00, 5.000e-01, 2.000e-01, 1.000e-01, 5.000e-02,
       2.000e-02])

In [42]:
r_aep_1_in_x = aep_1_in_x.copy()
r_aep_1_in_x[:2] = r_aep_1_in_x[:2].round(decimals=3)
r_aep_1_in_x[2:] = r_aep_1_in_x[2:].round(decimals=2)
r_aep_1_in_x

array([1.000e+00, 1.002e+00, 1.020e+00, 1.050e+00, 1.160e+00, 1.580e+00,
       2.000e+00, 2.540e+00, 5.000e+00, 5.520e+00, 1.000e+01, 2.000e+01,
       5.000e+01, 1.000e+02, 2.000e+02, 5.000e+02, 1.000e+03, 2.000e+03,
       5.000e+03])

In [43]:
r_ari = ari.copy()
r_ari = r_ari.round(decimals=2)
r_ari

array([8.0000e-02, 1.7000e-01, 2.5000e-01, 3.3000e-01, 5.0000e-01,
       1.0000e+00, 1.4400e+00, 2.0000e+00, 4.4800e+00, 5.0000e+00,
       9.4900e+00, 1.9500e+01, 4.9500e+01, 9.9500e+01, 1.9950e+02,
       4.9950e+02, 9.9950e+02, 1.9995e+03, 4.9995e+03])

In [44]:
r_frequency_table = pd.DataFrame(
    data=np.column_stack(tup=(r_ey, r_aep_percentage, r_aep_1_in_x, r_ari, frequency_window, frequency_label, frequency_tag)),
    columns=['EY', 'AEP_percentage', 'AEP_1_in_x', 'ARI', 'freq_window', 'freq_label', 'freq_tag']
    )
r_frequency_table.to_csv(path_or_buf='{}table_rounded_frequency.csv'.format(output_dir), index=False)
r_frequency_table.to_parquet(path='{}table_rounded_frequency.parquet'.format(output_dir))
r_frequency_table

Unnamed: 0,EY,AEP_percentage,AEP_1_in_x,ARI,freq_window,freq_label,freq_tag
0,12.0,100.0,1.0,0.08,frequent,12EY,freq_12EY
1,6.0,99.75,1.002,0.17,frequent,6EY,freq_6EY
2,4.0,98.17,1.02,0.25,frequent,4EY,freq_4EY
3,3.0,95.02,1.05,0.33,frequent,3EY,freq_3EY
4,2.0,86.47,1.16,0.5,frequent,2EY,freq_2EY
5,1.0,63.21,1.58,1.0,frequent,63.2%,freq_63_2_perc
6,0.69,50.0,2.0,1.44,frequent,50%,freq_50_perc
7,0.5,39.35,2.54,2.0,frequent,0.5EY,freq_0_5EY
8,0.22,20.0,5.0,4.48,frequent,20%,freq_20_perc
9,0.2,18.13,5.52,5.0,frequent,0.2EY,freq_0_2EY


# merge stats and pattern tables into timeseries

In [46]:
df_storm_stats = storm_stats_table.copy()
df_storm_stats.drop(columns=storm_stats_table.columns[[1,2,4,5,7,8,9,10,11,12,13,14]], inplace=True)
df_storm_stats.rename(columns={'Event_ID':'event_id', 'Burst_Duration_in_min':'duration_in_min', 'AEP_Window':'freq_window'}, inplace=True)
df_storm_stats['id_duration'] = pd.Series(data=np.unique(ar=df_storm_stats.duration_in_min.to_numpy(), return_inverse=True)[1])
df_storm_stats['id_window'] = pd.Series(data=np.unique(ar=df_storm_stats.freq_window.to_numpy(), return_inverse=True)[1])
df_storm_stats['id_pattern'] = pd.Series(data=list(np.arange(stop=10))*int(len(df_storm_stats)/10))

df_storm_stats

Unnamed: 0,event_id,duration_in_min,freq_window,id_duration,id_window,id_pattern
0,4380,10,frequent,0,0,0
1,4382,10,frequent,0,0,1
2,4384,10,frequent,0,0,2
3,4385,10,frequent,0,0,3
4,4386,10,frequent,0,0,4
...,...,...,...,...,...,...
715,2856,10080,rare,23,2,5
716,5064,10080,rare,23,2,6
717,5069,10080,rare,23,2,7
718,5071,10080,rare,23,2,8


In [47]:
df_temp_pattern = temp_pattern_table.copy()
df_temp_pattern['time_series'] = df_temp_pattern.apply(func=lambda arg: [ind for ind in arg.to_list()[5:] if str(ind) != 'nan'], axis=1)
df_temp_pattern.drop(columns=df_temp_pattern.columns[5:-1], inplace=True)
df_temp_pattern.drop(columns='Region', inplace=True)
df_temp_pattern.rename(columns={'EventID':'event_id', 'Duration':'duration_in_min', 'TimeStep':'time_step', 'AEP':'freq_window'}, inplace=True)
df_temp_pattern['id_duration'] = pd.Series(data=np.unique(ar=df_temp_pattern.duration_in_min.to_numpy(), return_inverse=True)[1])
df_temp_pattern['id_timestep'] = pd.Series(data=np.unique(ar=df_temp_pattern.time_step.to_numpy(), return_inverse=True)[1])
df_temp_pattern['id_window'] = pd.Series(data=np.unique(ar=df_temp_pattern.freq_window.to_numpy(), return_inverse=True)[1])
df_temp_pattern['id_pattern'] = pd.Series(data=list(np.arange(stop=10))*int(len(df_temp_pattern)/10))

df_temp_pattern

Unnamed: 0,event_id,duration_in_min,time_step,freq_window,time_series,id_duration,id_timestep,id_window,id_pattern
0,4380,10,5,frequent,"[58.06, 41.94]",0,0,0,0
1,4382,10,5,frequent,"[52.13, 47.87]",0,0,0,1
2,4384,10,5,frequent,"[60.71, 39.29]",0,0,0,2
3,4385,10,5,frequent,"[51.51, 48.49]",0,0,0,3
4,4386,10,5,frequent,"[54.55, 45.45]",0,0,0,4
...,...,...,...,...,...,...,...,...,...
715,2856,10080,180,rare,"[1.14, 1.8, 1.36, 1.29, 3.2, 4.78, 3.28, 3.13,...",23,5,2,5
716,5064,10080,180,rare,"[0.61, 0.93, 0.26, 0.04, 2.91, 1.17, 4.05, 0.1...",23,5,2,6
717,5069,10080,180,rare,"[0.0, 0.0, 0.0, 1.06, 0.04, 0.41, 0.71, 0.01, ...",23,5,2,7
718,5071,10080,180,rare,"[1.21, 0.0, 0.0, 0.0, 0.0, 0.04, 0.52, 2.96, 1...",23,5,2,8


In [49]:
df_timeseries = pd.merge(left=df_storm_stats, right=df_temp_pattern, how='inner', on='event_id', suffixes=(None, '__del'))# left_on='Event_ID', right_on='EventID')
df_timeseries.drop(columns=[ind for ind in df_timeseries.columns.to_list() if ind.rsplit(sep='__', maxsplit=1)[-1] == 'del'], inplace=True)

df_timeseries

Unnamed: 0,event_id,duration_in_min,freq_window,id_duration,id_window,id_pattern,time_step,time_series,id_timestep
0,4380,10,frequent,0,0,0,5,"[58.06, 41.94]",0
1,4382,10,frequent,0,0,1,5,"[52.13, 47.87]",0
2,4384,10,frequent,0,0,2,5,"[60.71, 39.29]",0
3,4385,10,frequent,0,0,3,5,"[51.51, 48.49]",0
4,4386,10,frequent,0,0,4,5,"[54.55, 45.45]",0
...,...,...,...,...,...,...,...,...,...
715,2856,10080,rare,23,2,5,180,"[1.14, 1.8, 1.36, 1.29, 3.2, 4.78, 3.28, 3.13,...",5
716,5064,10080,rare,23,2,6,180,"[0.61, 0.93, 0.26, 0.04, 2.91, 1.17, 4.05, 0.1...",5
717,5069,10080,rare,23,2,7,180,"[0.0, 0.0, 0.0, 1.06, 0.04, 0.41, 0.71, 0.01, ...",5
718,5071,10080,rare,23,2,8,180,"[1.21, 0.0, 0.0, 0.0, 0.0, 0.04, 0.52, 2.96, 1...",5


# idf depths

In [52]:
df_map = df_timeseries.copy()
df_map = df_map[['duration_in_min', 'time_step']]
df_map.reset_index(drop=True, inplace=True)
df_map = {key:val for key,val in zip(df_map.duration_in_min, df_map.time_step)}

df_map

{10: 5,
 15: 5,
 20: 5,
 25: 5,
 30: 5,
 45: 5,
 60: 5,
 90: 5,
 120: 5,
 180: 15,
 270: 15,
 360: 15,
 540: 30,
 720: 30,
 1080: 60,
 1440: 60,
 1800: 120,
 2160: 120,
 2880: 120,
 4320: 180,
 5760: 180,
 7200: 180,
 8640: 180,
 10080: 180}

In [53]:
df_id = frequency_table.copy()
df_id.drop(index=len(df_id)-1, inplace=True)
df_id = df_id[['freq_window', 'freq_label', 'freq_tag']]#.iloc[:-1,4:]
df_id = pd.concat(objs=[
    pd.Series(data=np.full(shape=len(df_id), fill_value='depth')),
    df_id,
    pd.Series(data=np.unique(ar=df_id.freq_window.to_numpy(), return_inverse=True)[1], name='id_window'),
    df_id.index.to_series(name='id_tag')
    ], axis=1)

df_id

Unnamed: 0,0,freq_window,freq_label,freq_tag,id_window,id_tag
0,depth,frequent,12EY,freq_12EY,0,0
1,depth,frequent,6EY,freq_6EY,0,1
2,depth,frequent,4EY,freq_4EY,0,2
3,depth,frequent,3EY,freq_3EY,0,3
4,depth,frequent,2EY,freq_2EY,0,4
5,depth,frequent,63.2%,freq_63_2_perc,0,5
6,depth,frequent,50%,freq_50_perc,0,6
7,depth,frequent,0.5EY,freq_0_5EY,0,7
8,depth,frequent,20%,freq_20_perc,0,8
9,depth,frequent,0.2EY,freq_0_2EY,0,9


In [54]:
df_idf = idf_table.copy()
df_idf = df_idf[df_idf.Duration_in_min >= 10]
df_idf.reset_index(drop=True, inplace=True)
df_idf.rename(columns={'Duration':'duration', 'Duration_in_min':'duration_in_min'}, inplace=True)
df_idf.duration_in_min = df_idf.duration_in_min.astype(int)
df_idf['id_duration'] = pd.Series(data=np.unique(ar=df_idf.duration_in_min.to_numpy(), return_inverse=True)[1])
df_idf['time_step'] = df_idf.duration_in_min.map(arg=df_map)
df_idf['id_timestep'] = pd.Series(data=np.unique(ar=df_idf.time_step.to_numpy(), return_inverse=True)[1])
df_idf.set_index(keys=[ind for ind in df_idf.columns.to_list() if ind.split(sep='_', maxsplit=1)[0] == 'id'] + ['duration', 'units', 'duration_in_min', 'time_step'] , inplace=True)
df_idf.columns = pd.MultiIndex.from_frame(df=df_id)
df_idf = df_idf.stack(level=list(df_idf.columns.names)[1:], future_stack=True)
df_idf.reset_index(inplace=True)

df_idf

Unnamed: 0,id_duration,id_timestep,duration,units,duration_in_min,time_step,freq_window,freq_label,freq_tag,id_window,id_tag,depth
0,0,0,10.0,min,10,5,frequent,12EY,freq_12EY,0,0,5.15
1,0,0,10.0,min,10,5,frequent,6EY,freq_6EY,0,1,5.91
2,0,0,10.0,min,10,5,frequent,4EY,freq_4EY,0,2,7.28
3,0,0,10.0,min,10,5,frequent,3EY,freq_3EY,0,3,8.27
4,0,0,10.0,min,10,5,frequent,2EY,freq_2EY,0,4,9.70
...,...,...,...,...,...,...,...,...,...,...,...,...
427,23,5,168.0,hour,10080,180,rare,1%,freq_1_perc,2,13,462.00
428,23,5,168.0,hour,10080,180,rare,1 in 200,freq_1_in_200,2,14,514.00
429,23,5,168.0,hour,10080,180,rare,1 in 500,freq_1_in_500,2,15,578.00
430,23,5,168.0,hour,10080,180,rare,1 in 1000,freq_1_in_1000,2,16,628.00


# rainfall_patterns

In [55]:
rain_data = pd.merge(left=df_timeseries, right=df_idf, how='cross', suffixes=[None, '__del'])
rain_data = rain_data[
    (rain_data.id_window == rain_data.id_window__del) & 
    (rain_data.id_duration == rain_data.id_duration__del) & 
    (rain_data.id_timestep == rain_data.id_timestep__del)
    ]
rain_data.drop(columns=[ind for ind in rain_data.columns.to_list() if ind.rsplit(sep='__', maxsplit=1)[-1] == 'del'], inplace=True)
rain_data.drop(columns='event_id', inplace=True)
rain_data.drop_duplicates(subset=list(rain_data.columns.to_numpy()[~np.isin(element=rain_data.columns.to_numpy(), test_elements='time_series')]), inplace=True)
rain_data.sort_values(by=['id_tag', 'id_duration', 'id_timestep', 'id_pattern'], inplace=True)
rain_data.reset_index(drop=True, inplace=True)
rain_data['id_group'] = pd.Series(data=np.concatenate([np.full(shape=10, fill_value=ind) for ind in np.arange(stop=np.divide(rain_data.shape[0], 10), dtype=np.int64)]))
rain_data['rain_label'] = (
    (rain_data.id_group.astype(str).str.len().max() - rain_data.id_group.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_group)) + rain_data.id_group.astype(str) + '__' +
    (rain_data.index.astype(str).str.len().max() - rain_data.index.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.index)) + rain_data.index.astype(str) + '__' +
    (rain_data.id_tag.astype(str).str.len().max() - rain_data.id_tag.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_tag)) + rain_data.id_tag.astype(str) + '_' +
    (rain_data.id_duration.astype(str).str.len().max() - rain_data.id_duration.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_duration)) + rain_data.id_duration.astype(str) + '_' +
    (rain_data.id_timestep.astype(str).str.len().max() - rain_data.id_timestep.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_timestep)) + rain_data.id_timestep.astype(str) + '_' +
    (rain_data.id_pattern.astype(str).str.len().max() - rain_data.id_pattern.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_pattern)) + rain_data.id_pattern.astype(str) + '__' +
    (rain_data.id_window.astype(str).str.len().max() - rain_data.id_window.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_window)) + rain_data.id_window.astype(str) + '__' +
    (rain_data.freq_window.str.len().max() - rain_data.freq_window.str.len())*pd.Series(data=['_']*len(rain_data.freq_window)) + rain_data.freq_window + '__' +
    (rain_data.freq_tag.str.len().max() - rain_data.freq_tag.str.len())*pd.Series(data=['_']*len(rain_data.freq_tag)) + rain_data.freq_tag + '__' +
    (rain_data.duration_in_min.astype(str).str.len().max() - rain_data.duration_in_min.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.duration_in_min)) + rain_data.duration_in_min.astype(str) + '__' +
    (rain_data.time_step.astype(str).str.len().max() - rain_data.time_step.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.time_step)) + rain_data.time_step.astype(str) + '__' +
    (rain_data.id_pattern.astype(str).str.len().max() - rain_data.id_pattern.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_pattern)) + rain_data.id_pattern.astype(str)
    )
rain_data.time_series = rain_data.time_series.apply(func=lambda arg: [0] + arg + [0])

rain_data

Unnamed: 0,duration_in_min,freq_window,id_duration,id_window,id_pattern,time_step,time_series,id_timestep,duration,units,freq_label,freq_tag,id_tag,depth,id_group,rain_label
0,10,frequent,0,0,0,5,"[0, 58.06, 41.94, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0000__00_00_0_0__0______frequent_______fr...
1,10,frequent,0,0,1,5,"[0, 52.13, 47.87, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0001__00_00_0_1__0______frequent_______fr...
2,10,frequent,0,0,2,5,"[0, 60.71, 39.29, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0002__00_00_0_2__0______frequent_______fr...
3,10,frequent,0,0,3,5,"[0, 51.51, 48.49, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0003__00_00_0_3__0______frequent_______fr...
4,10,frequent,0,0,4,5,"[0, 54.55, 45.45, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0004__00_00_0_4__0______frequent_______fr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4315,10080,rare,23,2,5,180,"[0, 1.14, 1.8, 1.36, 1.29, 3.2, 4.78, 3.28, 3....",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4315__17_23_5_5__2__________rare__freq_1_...
4316,10080,rare,23,2,6,180,"[0, 0.61, 0.93, 0.26, 0.04, 2.91, 1.17, 4.05, ...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4316__17_23_5_6__2__________rare__freq_1_...
4317,10080,rare,23,2,7,180,"[0, 0.0, 0.0, 0.0, 1.06, 0.04, 0.41, 0.71, 0.0...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4317__17_23_5_7__2__________rare__freq_1_...
4318,10080,rare,23,2,8,180,"[0, 1.21, 0.0, 0.0, 0.0, 0.0, 0.04, 0.52, 2.96...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4318__17_23_5_8__2__________rare__freq_1_...


# inp files

In [56]:
def create_inp_files(
    arg_rain_label, arg_time_series, arg_depth, arg_time_step,
    arg_sim_starting_time, arg_timeseries_name,
    arg_inp_file_dir, arg_inp_file_path, arg_additional_rain_stopping_time
    ):

    # create inp file
    inp_file = '{}inp_file__{}.inp'.format(arg_inp_file_dir, arg_rain_label)
    shutil.copyfile(src=arg_inp_file_path, dst=inp_file)

    # [TIMESERIES] section
    rain_value = pd.Series(
        data=np.divide(np.multiply(arg_depth, np.array(object=arg_time_series)), 100), name='Value')

    rain_date_time = pd.Series(data=pd.date_range(
        start=arg_sim_starting_time,
        periods=rain_value.size,
        freq=pd.Timedelta(minutes=arg_time_step)
        ), name='date_time')
    rain_date = rain_date_time.dt.strftime(date_format='%m/%d/%Y')
    rain_date.name = 'Date'
    rain_time = rain_date_time.dt.strftime(date_format='%H:%M')
    rain_time.name = 'Time'

    rain_timeseries_name = pd.Series(data=[arg_timeseries_name]*rain_value.size, name='Name')

    timeseries_section = pd.concat(objs=[rain_timeseries_name, rain_date, rain_time, rain_value], axis=1)
    timeseries_section.set_index(keys='Name', inplace=True)

    # [OPTIONS] section
    end_date_time = rain_date_time.iloc[-1] + arg_additional_rain_stopping_time
    end_date = end_date_time.strftime(format='%m/%d/%Y')
    end_time = end_date_time.strftime(format='%H:%M:%S')

    options_section = swmmio.utils.dataframes.dataframe_from_inp(inp_path=inp_file, section='[OPTIONS]')
    options_section.at['END_DATE', 'Value'] = end_date
    options_section.at['END_TIME', 'Value'] = end_time

    # [RAINGAGES] section
    time_interval = str(pd.Timedelta(minutes=arg_time_step)).split(sep=' ')[-1].rsplit(sep=':', maxsplit=1)[0]

    raingauges_section = swmmio.utils.dataframes.dataframe_from_inp(inp_path=inp_file, section='[RAINGAGES]')
    raingauges_section.at[arg_timeseries_name, 'TimeIntrvl'] = time_interval

    # replace section into SWMM files
    [swmmio.utils.modify_model.replace_inp_section(inp_path=inp_file, modified_section_header=ind1, new_data=ind2)
    for ind1, ind2 in zip(
        ['[OPTIONS]', '[RAINGAGES]', '[TIMESERIES]'], 
        [options_section, raingauges_section, timeseries_section]
        )]

In [57]:
rain_data.apply(func=lambda arg: create_inp_files(
    arg.rain_label, arg.time_series, arg.depth, arg.time_step, 
    simulation_starting_time, timeseries_name,
    inp_file_dir, inp_file_path, stopping_time_after_precipitation_finish
    ), axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
4315    None
4316    None
4317    None
4318    None
4319    None
Length: 4320, dtype: object

# run SWMM

In [58]:
def run_swmm(arg_inp_dir, arg_link, arg_label):

    model = '{}inp_file__{}.inp'.format(arg_inp_dir, arg_label)

    with Simulation(inputfile=model) as sim:
        link_sim = Links(model=sim)[arg_link]
        time_stamp = []
        flow_rate = []
        for ind2, step in enumerate(sim):
            if (sim.current_time.minute%5 == 0)&(sim.current_time.second == 0):
                time_stamp.append(sim.current_time)
                flow_rate.append(link_sim.flow)
        
        df_flow_data = pd.concat(
            objs=[pd.Series(data=time_stamp, name='timestamp'), pd.Series(data=flow_rate, name='flow_rate')], 
            axis=1)
        df_flow_data.set_index(keys='timestamp', inplace=True)

        return [max(flow_rate), df_flow_data]

In [59]:
def run_multiprocessing(arg_tuples_from_label_series, arg_n_core=n_core, arg_max_tasks_per_child=100):

    n_task = len(arg_tuples_from_label_series)
    n_task_per_chunk = math.ceil(n_task/arg_n_core)

    with mp.Pool(processes=arg_n_core, maxtasksperchild=arg_max_tasks_per_child) as pool:
        outflow = [ind for ind in pool.starmap(func=run_swmm, iterable=arg_tuples_from_label_series, chunksize=n_task_per_chunk)]

    return outflow

In [60]:
rain_data[['max_flow_rate', 'flow_data']] = pd.DataFrame(data=run_multiprocessing(
    [(inp_file_dir, link_to_get_results, ind) for ind in rain_data.rain_label.to_list()], n_core))
rain_data

Unnamed: 0,duration_in_min,freq_window,id_duration,id_window,id_pattern,time_step,time_series,id_timestep,duration,units,freq_label,freq_tag,id_tag,depth,id_group,rain_label,max_flow_rate,flow_data
0,10,frequent,0,0,0,5,"[0, 58.06, 41.94, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0000__00_00_0_0__0______frequent_______fr...,3.092359,flow_rate timestamp ...
1,10,frequent,0,0,1,5,"[0, 52.13, 47.87, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0001__00_00_0_1__0______frequent_______fr...,3.095752,flow_rate timestamp ...
2,10,frequent,0,0,2,5,"[0, 60.71, 39.29, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0002__00_00_0_2__0______frequent_______fr...,3.097545,flow_rate timestamp ...
3,10,frequent,0,0,3,5,"[0, 51.51, 48.49, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0003__00_00_0_3__0______frequent_______fr...,3.090740,flow_rate timestamp ...
4,10,frequent,0,0,4,5,"[0, 54.55, 45.45, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0004__00_00_0_4__0______frequent_______fr...,3.092550,flow_rate timestamp ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4315,10080,rare,23,2,5,180,"[0, 1.14, 1.8, 1.36, 1.29, 3.2, 4.78, 3.28, 3....",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4315__17_23_5_5__2__________rare__freq_1_...,5.326424,flow_rate timestamp ...
4316,10080,rare,23,2,6,180,"[0, 0.61, 0.93, 0.26, 0.04, 2.91, 1.17, 4.05, ...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4316__17_23_5_6__2__________rare__freq_1_...,5.930555,flow_rate timestamp ...
4317,10080,rare,23,2,7,180,"[0, 0.0, 0.0, 0.0, 1.06, 0.04, 0.41, 0.71, 0.0...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4317__17_23_5_7__2__________rare__freq_1_...,16.754755,flow_rate timestamp ...
4318,10080,rare,23,2,8,180,"[0, 1.21, 0.0, 0.0, 0.0, 0.0, 0.04, 0.52, 2.96...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4318__17_23_5_8__2__________rare__freq_1_...,12.556005,flow_rate timestamp ...


# merging and rearrenging dataframes

In [62]:
def temp_pattern(arg_time_series, arg_time_step, arg_sim_start_time):

    cum_pattern = pd.Series(
        data=np.array(object=arg_time_series)[:-1].cumsum(), 
        name='cum_pattern')
    pat_timestamp = pd.Series(
        data=pd.date_range(
            start=arg_sim_start_time, 
            periods=cum_pattern.size, 
            freq=pd.Timedelta(minutes=arg_time_step)
            ), 
        name='timestamp')
    
    df_cum_pattern = pd.concat(objs=[pat_timestamp, cum_pattern], axis=1)
    df_cum_pattern.set_index(keys='timestamp', inplace=True)

    return df_cum_pattern

In [63]:
rain_data['cum_temp_pattern_data'] = rain_data.apply(func=lambda arg: temp_pattern(arg.time_series, arg.time_step, simulation_starting_time), axis=1)
rain_data

Unnamed: 0,duration_in_min,freq_window,id_duration,id_window,id_pattern,time_step,time_series,id_timestep,duration,units,freq_label,freq_tag,id_tag,depth,id_group,rain_label,max_flow_rate,flow_data,cum_temp_pattern_data
0,10,frequent,0,0,0,5,"[0, 58.06, 41.94, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0000__00_00_0_0__0______frequent_______fr...,3.092359,flow_rate timestamp ...,cum_pattern timestamp ...
1,10,frequent,0,0,1,5,"[0, 52.13, 47.87, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0001__00_00_0_1__0______frequent_______fr...,3.095752,flow_rate timestamp ...,cum_pattern timestamp ...
2,10,frequent,0,0,2,5,"[0, 60.71, 39.29, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0002__00_00_0_2__0______frequent_______fr...,3.097545,flow_rate timestamp ...,cum_pattern timestamp ...
3,10,frequent,0,0,3,5,"[0, 51.51, 48.49, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0003__00_00_0_3__0______frequent_______fr...,3.090740,flow_rate timestamp ...,cum_pattern timestamp ...
4,10,frequent,0,0,4,5,"[0, 54.55, 45.45, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0004__00_00_0_4__0______frequent_______fr...,3.092550,flow_rate timestamp ...,cum_pattern timestamp ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4315,10080,rare,23,2,5,180,"[0, 1.14, 1.8, 1.36, 1.29, 3.2, 4.78, 3.28, 3....",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4315__17_23_5_5__2__________rare__freq_1_...,5.326424,flow_rate timestamp ...,cum_pattern timestamp ...
4316,10080,rare,23,2,6,180,"[0, 0.61, 0.93, 0.26, 0.04, 2.91, 1.17, 4.05, ...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4316__17_23_5_6__2__________rare__freq_1_...,5.930555,flow_rate timestamp ...,cum_pattern timestamp ...
4317,10080,rare,23,2,7,180,"[0, 0.0, 0.0, 0.0, 1.06, 0.04, 0.41, 0.71, 0.0...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4317__17_23_5_7__2__________rare__freq_1_...,16.754755,flow_rate timestamp ...,cum_pattern timestamp ...
4318,10080,rare,23,2,8,180,"[0, 1.21, 0.0, 0.0, 0.0, 0.0, 0.04, 0.52, 2.96...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4318__17_23_5_8__2__________rare__freq_1_...,12.556005,flow_rate timestamp ...,cum_pattern timestamp ...


In [65]:
def rainfall_depth(arg_time_series, arg_depth, arg_time_step, arg_sim_start_time):

    prec_depth = pd.Series(
        data=np.divide(np.multiply(arg_depth, np.array(object=arg_time_series)), 100),
        name='prec_depth')
    prec_timestamp = pd.Series(
        data=pd.date_range(
            start=arg_sim_start_time, 
            periods=prec_depth.size, 
            freq=pd.Timedelta(minutes=arg_time_step)
            ), 
        name='timestamp')

    df_precipitation = pd.concat(objs=[prec_timestamp, prec_depth], axis=1)
    df_precipitation.set_index(keys='timestamp', inplace=True)

    return df_precipitation

In [66]:
rain_data['rainfall_data'] = rain_data.apply(func=lambda arg: rainfall_depth(arg.time_series, arg.depth, arg.time_step, simulation_starting_time), axis=1)
rain_data

Unnamed: 0,duration_in_min,freq_window,id_duration,id_window,id_pattern,time_step,time_series,id_timestep,duration,units,freq_label,freq_tag,id_tag,depth,id_group,rain_label,max_flow_rate,flow_data,cum_temp_pattern_data,rainfall_data
0,10,frequent,0,0,0,5,"[0, 58.06, 41.94, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0000__00_00_0_0__0______frequent_______fr...,3.092359,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
1,10,frequent,0,0,1,5,"[0, 52.13, 47.87, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0001__00_00_0_1__0______frequent_______fr...,3.095752,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
2,10,frequent,0,0,2,5,"[0, 60.71, 39.29, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0002__00_00_0_2__0______frequent_______fr...,3.097545,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
3,10,frequent,0,0,3,5,"[0, 51.51, 48.49, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0003__00_00_0_3__0______frequent_______fr...,3.090740,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4,10,frequent,0,0,4,5,"[0, 54.55, 45.45, 0]",0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0004__00_00_0_4__0______frequent_______fr...,3.092550,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4315,10080,rare,23,2,5,180,"[0, 1.14, 1.8, 1.36, 1.29, 3.2, 4.78, 3.28, 3....",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4315__17_23_5_5__2__________rare__freq_1_...,5.326424,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4316,10080,rare,23,2,6,180,"[0, 0.61, 0.93, 0.26, 0.04, 2.91, 1.17, 4.05, ...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4316__17_23_5_6__2__________rare__freq_1_...,5.930555,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4317,10080,rare,23,2,7,180,"[0, 0.0, 0.0, 0.0, 1.06, 0.04, 0.41, 0.71, 0.0...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4317__17_23_5_7__2__________rare__freq_1_...,16.754755,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4318,10080,rare,23,2,8,180,"[0, 1.21, 0.0, 0.0, 0.0, 0.0, 0.04, 0.52, 2.96...",5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4318__17_23_5_8__2__________rare__freq_1_...,12.556005,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...


In [68]:
rain_data.drop(columns='time_series', inplace=True)
rain_data

Unnamed: 0,duration_in_min,freq_window,id_duration,id_window,id_pattern,time_step,id_timestep,duration,units,freq_label,freq_tag,id_tag,depth,id_group,rain_label,max_flow_rate,flow_data,cum_temp_pattern_data,rainfall_data
0,10,frequent,0,0,0,5,0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0000__00_00_0_0__0______frequent_______fr...,3.092359,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
1,10,frequent,0,0,1,5,0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0001__00_00_0_1__0______frequent_______fr...,3.095752,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
2,10,frequent,0,0,2,5,0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0002__00_00_0_2__0______frequent_______fr...,3.097545,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
3,10,frequent,0,0,3,5,0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0003__00_00_0_3__0______frequent_______fr...,3.090740,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4,10,frequent,0,0,4,5,0,10.0,min,12EY,freq_12EY,0,5.15,0,000__0004__00_00_0_4__0______frequent_______fr...,3.092550,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4315,10080,rare,23,2,5,180,5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4315__17_23_5_5__2__________rare__freq_1_...,5.326424,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4316,10080,rare,23,2,6,180,5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4316__17_23_5_6__2__________rare__freq_1_...,5.930555,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4317,10080,rare,23,2,7,180,5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4317__17_23_5_7__2__________rare__freq_1_...,16.754755,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...
4318,10080,rare,23,2,8,180,5,168.0,hour,1 in 2000,freq_1_in_2000,17,677.00,431,431__4318__17_23_5_8__2__________rare__freq_1_...,12.556005,flow_rate timestamp ...,cum_pattern timestamp ...,prec_depth timestamp ...


In [69]:
rain_data = pd.DataFrame(data=rain_data.groupby(by=['id_tag', 'id_duration']), columns=['id_tag_dur', 'rainfall_data'])
rain_data[['id_tag', 'id_duration']] = rain_data.apply(func=lambda arg: (arg.id_tag_dur[0], arg.id_tag_dur[1]), axis=1, result_type='expand')
rain_data.drop(columns='id_tag_dur', inplace=True)

rain_data

Unnamed: 0,rainfall_data,id_tag,id_duration
0,duration_in_min freq_window id_duration i...,0,0
1,duration_in_min freq_window id_duration ...,0,1
2,duration_in_min freq_window id_duration ...,0,2
3,duration_in_min freq_window id_duration ...,0,3
4,duration_in_min freq_window id_duration ...,0,4
...,...,...,...
427,duration_in_min freq_window id_duration...,17,19
428,duration_in_min freq_window id_duration...,17,20
429,duration_in_min freq_window id_duration...,17,21
430,duration_in_min freq_window id_duration...,17,22


In [70]:
def rearrange_timeseries(arg_df_rainfall_data):

    arg_df_rainfall_data.drop(columns=['id_tag', 'id_duration'], inplace=True)

    ind_sep = np.isin(
        element=arg_df_rainfall_data.columns.to_numpy(), 
        test_elements=np.array(object=[
            'id_pattern', 'rain_label', 'max_flow_rate', 
            'flow_data', 'cum_temp_pattern_data', 'rainfall_data']))
    arg_df_rainfall_data, time_series_data = (
        arg_df_rainfall_data.copy()[list(arg_df_rainfall_data.columns.to_numpy()[~ind_sep])], 
        arg_df_rainfall_data.copy()[list(arg_df_rainfall_data.columns.to_numpy()[ind_sep])])
    arg_df_rainfall_data.drop_duplicates(inplace=True)
    arg_df_rainfall_data.reset_index(drop=True, inplace=True)
    time_series_data.reset_index(drop=True, inplace=True)

    max_flow_rate_data = time_series_data.max_flow_rate.to_numpy()

    flow_rate_data = pd.concat(objs=time_series_data.flow_data.to_list(), axis=1)
    flow_rate_data.columns = ['flow rate {:02}'.format(ind) for ind in np.arange(stop=flow_rate_data.columns.size)]
    flow_rate_data = flow_rate_data.iloc[:np.where(flow_rate_data.index.to_numpy() == flow_rate_data[flow_rate_data.sum(axis=1) != 0].iloc[-1].name)[0][0]+2, :]

    pattern_data = pd.concat(objs=time_series_data.cum_temp_pattern_data.to_list(), axis=1)
    pattern_data.columns = ['pattern {:02}'.format(ind) for ind in np.arange(stop=pattern_data.columns.size)]

    prec_data = pd.concat(objs=time_series_data.rainfall_data.to_list(), axis=1)
    prec_data.columns = ['rain depth {:02}'.format(ind) for ind in np.arange(stop=prec_data.columns.size)]

    arg_df_rainfall_data['max_flow_rate'] = pd.Series()
    arg_df_rainfall_data['flow_data'] = pd.Series()
    arg_df_rainfall_data['cum_pattern_data'] = pd.Series()
    arg_df_rainfall_data['prec_data'] = pd.Series()

    arg_df_rainfall_data.at[0, 'max_flow_rate'] = max_flow_rate_data
    arg_df_rainfall_data.at[0, 'flow_data'] = flow_rate_data
    arg_df_rainfall_data.at[0, 'cum_pattern_data'] = pattern_data
    arg_df_rainfall_data.at[0, 'prec_data'] = prec_data

    arg_df_rainfall_data = tuple(arg_df_rainfall_data.itertuples(index=False, name=None))[0]

    return arg_df_rainfall_data

In [71]:
rain_data[['duration_in_min', 'freq_window', 'id_window', 'time_step',
    'id_timestep', 'duration', 'units', 'freq_label', 'freq_tag', 
    'depth', 'id_group', 'max_flow_rate', 'flow_data', 
    'cum_pattern_data', 'prec_data']] = rain_data.apply(func=lambda arg: rearrange_timeseries(arg.rainfall_data), axis=1, result_type='expand')
rain_data

Unnamed: 0,rainfall_data,id_tag,id_duration,duration_in_min,freq_window,id_window,time_step,id_timestep,duration,units,freq_label,freq_tag,depth,id_group,max_flow_rate,flow_data,cum_pattern_data,prec_data
0,duration_in_min freq_window id_window id_...,0,0,10,frequent,0,5,0,10.0,min,12EY,freq_12EY,5.15,0,"[3.092359319515403, 3.0957524698775174, 3.0975...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
1,duration_in_min freq_window id_window id...,0,1,15,frequent,0,5,0,15.0,min,12EY,freq_12EY,6.29,1,"[3.2815647857861006, 3.329428660559919, 3.2097...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
2,duration_in_min freq_window id_window id...,0,2,20,frequent,0,5,0,20.0,min,12EY,freq_12EY,7.16,2,"[3.475541961735609, 3.043977490542852, 3.10098...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
3,duration_in_min freq_window id_window id...,0,3,25,frequent,0,5,0,25.0,min,12EY,freq_12EY,7.86,3,"[2.8560383296836087, 2.983632935728466, 3.2046...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
4,duration_in_min freq_window id_window id...,0,4,30,frequent,0,5,0,30.0,min,12EY,freq_12EY,8.46,4,"[3.1831950859288947, 3.285209879961788, 2.9636...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,duration_in_min freq_window id_window ...,17,19,4320,rare,2,180,5,72.0,hour,1 in 2000,freq_1_in_2000,642.00,427,"[11.371888551159584, 7.247327487231274, 17.314...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
428,duration_in_min freq_window id_window ...,17,20,5760,rare,2,180,5,96.0,hour,1 in 2000,freq_1_in_2000,656.00,428,"[10.248959306689986, 7.724093598878112, 13.244...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
429,duration_in_min freq_window id_window ...,17,21,7200,rare,2,180,5,120.0,hour,1 in 2000,freq_1_in_2000,662.00,429,"[8.55124487004027, 7.695830049066009, 5.932231...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
430,duration_in_min freq_window id_window ...,17,22,8640,rare,2,180,5,144.0,hour,1 in 2000,freq_1_in_2000,668.00,430,"[7.895309234218781, 5.408443503679562, 9.91701...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...


In [72]:
rain_data.drop(columns='rainfall_data', inplace=True)
rain_data

Unnamed: 0,id_tag,id_duration,duration_in_min,freq_window,id_window,time_step,id_timestep,duration,units,freq_label,freq_tag,depth,id_group,max_flow_rate,flow_data,cum_pattern_data,prec_data
0,0,0,10,frequent,0,5,0,10.0,min,12EY,freq_12EY,5.15,0,"[3.092359319515403, 3.0957524698775174, 3.0975...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
1,0,1,15,frequent,0,5,0,15.0,min,12EY,freq_12EY,6.29,1,"[3.2815647857861006, 3.329428660559919, 3.2097...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
2,0,2,20,frequent,0,5,0,20.0,min,12EY,freq_12EY,7.16,2,"[3.475541961735609, 3.043977490542852, 3.10098...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
3,0,3,25,frequent,0,5,0,25.0,min,12EY,freq_12EY,7.86,3,"[2.8560383296836087, 2.983632935728466, 3.2046...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
4,0,4,30,frequent,0,5,0,30.0,min,12EY,freq_12EY,8.46,4,"[3.1831950859288947, 3.285209879961788, 2.9636...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,17,19,4320,rare,2,180,5,72.0,hour,1 in 2000,freq_1_in_2000,642.00,427,"[11.371888551159584, 7.247327487231274, 17.314...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
428,17,20,5760,rare,2,180,5,96.0,hour,1 in 2000,freq_1_in_2000,656.00,428,"[10.248959306689986, 7.724093598878112, 13.244...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
429,17,21,7200,rare,2,180,5,120.0,hour,1 in 2000,freq_1_in_2000,662.00,429,"[8.55124487004027, 7.695830049066009, 5.932231...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...
430,17,22,8640,rare,2,180,5,144.0,hour,1 in 2000,freq_1_in_2000,668.00,430,"[7.895309234218781, 5.408443503679562, 9.91701...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...


In [73]:
rain_data['label'] = (
    (rain_data.id_group.astype(str).str.len().max() - rain_data.id_group.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_group)) + rain_data.id_group.astype(str) + '__' +
    (rain_data.id_tag.astype(str).str.len().max() - rain_data.id_tag.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_tag)) + rain_data.id_tag.astype(str) + '_' +
    (rain_data.id_duration.astype(str).str.len().max() - rain_data.id_duration.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_duration)) + rain_data.id_duration.astype(str) + '_' +
    (rain_data.id_timestep.astype(str).str.len().max() - rain_data.id_timestep.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_timestep)) + rain_data.id_timestep.astype(str) + '__' +
    (rain_data.id_window.astype(str).str.len().max() - rain_data.id_window.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_window)) + rain_data.id_window.astype(str) + '__' +
    (rain_data.freq_window.str.len().max() - rain_data.freq_window.str.len())*pd.Series(data=['_']*len(rain_data.freq_window)) + rain_data.freq_window + '__' +
    (rain_data.freq_tag.str.len().max() - rain_data.freq_tag.str.len())*pd.Series(data=['_']*len(rain_data.freq_tag)) + rain_data.freq_tag + '__' +
    (rain_data.duration_in_min.astype(str).str.len().max() - rain_data.duration_in_min.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.duration_in_min)) + rain_data.duration_in_min.astype(str) + '__' +
    (rain_data.time_step.astype(str).str.len().max() - rain_data.time_step.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.time_step)) + rain_data.time_step.astype(str)
    )
rain_data

Unnamed: 0,id_tag,id_duration,duration_in_min,freq_window,id_window,time_step,id_timestep,duration,units,freq_label,freq_tag,depth,id_group,max_flow_rate,flow_data,cum_pattern_data,prec_data,label
0,0,0,10,frequent,0,5,0,10.0,min,12EY,freq_12EY,5.15,0,"[3.092359319515403, 3.0957524698775174, 3.0975...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,000__00_00_0__0______frequent_______freq_12EY_...
1,0,1,15,frequent,0,5,0,15.0,min,12EY,freq_12EY,6.29,1,"[3.2815647857861006, 3.329428660559919, 3.2097...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,001__00_01_0__0______frequent_______freq_12EY_...
2,0,2,20,frequent,0,5,0,20.0,min,12EY,freq_12EY,7.16,2,"[3.475541961735609, 3.043977490542852, 3.10098...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,002__00_02_0__0______frequent_______freq_12EY_...
3,0,3,25,frequent,0,5,0,25.0,min,12EY,freq_12EY,7.86,3,"[2.8560383296836087, 2.983632935728466, 3.2046...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,003__00_03_0__0______frequent_______freq_12EY_...
4,0,4,30,frequent,0,5,0,30.0,min,12EY,freq_12EY,8.46,4,"[3.1831950859288947, 3.285209879961788, 2.9636...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,004__00_04_0__0______frequent_______freq_12EY_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,17,19,4320,rare,2,180,5,72.0,hour,1 in 2000,freq_1_in_2000,642.00,427,"[11.371888551159584, 7.247327487231274, 17.314...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,427__17_19_5__2__________rare__freq_1_in_2000_...
428,17,20,5760,rare,2,180,5,96.0,hour,1 in 2000,freq_1_in_2000,656.00,428,"[10.248959306689986, 7.724093598878112, 13.244...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,428__17_20_5__2__________rare__freq_1_in_2000_...
429,17,21,7200,rare,2,180,5,120.0,hour,1 in 2000,freq_1_in_2000,662.00,429,"[8.55124487004027, 7.695830049066009, 5.932231...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,429__17_21_5__2__________rare__freq_1_in_2000_...
430,17,22,8640,rare,2,180,5,144.0,hour,1 in 2000,freq_1_in_2000,668.00,430,"[7.895309234218781, 5.408443503679562, 9.91701...",flow rate 00 flow rate 0...,pattern 00 pattern 01 p...,rain depth 00 rain depth...,430__17_22_5__2__________rare__freq_1_in_2000_...


# run multiprocessing for export data and plots

In [74]:
def export_data_and_plot(
    arg_freq_window, arg_freq_label, arg_duration_in_min, arg_time_step, 
    arg_id_group, arg_duration, arg_units, arg_depth, arg_label, 
    arg_cum_pattern_data, arg_prec_data, arg_flow_data,
    arg_rain_pattern_cum_dir, arg_precipitation_dir, arg_flow_rate_dir, arg_graphs_pattern_dir
    ):

    matplotlib.use(backend='agg')

    # export data
    arg_cum_pattern_data.to_parquet(path='{}cum_pattern__{}.parquet'.format(arg_rain_pattern_cum_dir, arg_label))
    arg_prec_data.to_parquet(path='{}prec_data__{}.parquet'.format(arg_precipitation_dir, arg_label))
    arg_flow_data.to_parquet(path='{}flow_data__{}.parquet'.format(arg_flow_rate_dir, arg_label))

    # plot
    fig, ax = plt.subplots(nrows=11, ncols=3, figsize=(10,40), num=1, clear=True)

    colors = list(mcolors.TABLEAU_COLORS.keys())

    # temporal patterns
    for col, color in zip(arg_cum_pattern_data.columns, colors):
        ax[0,0].plot(
            arg_cum_pattern_data.index.to_numpy(),
            arg_cum_pattern_data[col].to_numpy(),
            linewidth=2,
            color=color,
            label=col
            )

    ax[0,0].legend(fontsize=8, framealpha=0.5)
    ax[0,0].grid(visible=True, which='both')
    ax[0,0].set_title(label='Temporal Patterns', fontdict={'fontsize':10})
    ax[0,0].set_xlabel(xlabel='Timestamp', fontdict={'fontsize':9})
    ax[0,0].set_ylabel(ylabel='Percentage Precipitation', fontdict={'fontsize':9})
    ax[0,0].set_yticklabels(labels=ax[0,0].get_yticklabels(), fontdict={'fontsize':8})
    ax[0,0].set_xticklabels(labels=ax[0,0].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})

    for ind, col, color in zip(range(1,11), arg_cum_pattern_data.columns, colors):
        ax[ind,0].plot(
            arg_cum_pattern_data.index.to_numpy(),
            arg_cum_pattern_data[col].to_numpy(),
            linewidth=2,
            color=color,
            label=col
            )

        ax[ind,0].legend(fontsize=8, framealpha=0.5)
        ax[ind,0].grid(visible=True, which='both')
        ax[ind,0].set_title(label='Temporal Pattern: {}'.format(col.title()), fontdict={'fontsize':10})
        ax[ind,0].set_xlabel(xlabel='Timestamp', fontdict={'fontsize':9})
        ax[ind,0].set_ylabel(ylabel='Percentage Precipitation', fontdict={'fontsize':9})
        ax[ind,0].set_yticklabels(labels=ax[ind,0].get_yticklabels(), fontdict={'fontsize':8})
        ax[ind,0].set_xticklabels(labels=ax[ind,0].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})

    # precipitation data
    for col, color in zip(arg_prec_data.columns, colors):
        ax[0,1].fill_between(
            x=arg_prec_data.index.to_numpy(),
            y1=arg_prec_data[col].to_numpy(),
            step='post',
            linewidth=0.5,
            color=color,
            label=col
            )

    ax[0,1].legend(fontsize=8, framealpha=0.5)
    ax[0,1].grid(visible=True, which='both')
    ax[0,1].set_title(label='Precipitation Series', fontdict={'fontsize':10})
    ax[0,1].set_xlabel(xlabel='Timestamp', fontdict={'fontsize':9})
    ax[0,1].set_ylabel(ylabel='Precipitation Depth ($mm$)', fontdict={'fontsize':9})
    ax[0,1].set_yticklabels(labels=ax[0,1].get_yticklabels(), fontdict={'fontsize':8})
    ax[0,1].set_xticklabels(labels=ax[0,1].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})

    for ind, col, color in zip(range(1,11), arg_prec_data.columns, colors):
        ax[ind,1].fill_between(
            x=arg_prec_data.index.to_numpy(),
            y1=arg_prec_data[col].to_numpy(),
            step='post',
            linewidth=0.5,
            color=color,
            label=col
            )

        ax[ind,1].legend(fontsize=8, framealpha=0.5)
        ax[ind,1].grid(visible=True, which='both')
        ax[ind,1].set_title(label='Precipitation Serie: {}'.format(col.title()), fontdict={'fontsize':10})
        ax[ind,1].set_xlabel(xlabel='Timestamp', fontdict={'fontsize':9})
        ax[ind,1].set_ylabel(ylabel='Precipitation Depth ($mm$)', fontdict={'fontsize':9})
        ax[ind,1].set_yticklabels(labels=ax[ind,1].get_yticklabels(), fontdict={'fontsize':8})
        ax[ind,1].set_xticklabels(labels=ax[ind,1].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})

    # flow data
    for col, color in zip(arg_flow_data.columns, colors):
        ax[0,2].fill_between(
            x=arg_flow_data.index.to_numpy(),
            y1=arg_flow_data[col].to_numpy(),
            linewidth=0.5,
            color=color,
            label=col
            )

    ax[0,2].legend(fontsize=8, framealpha=0.5)
    ax[0,2].grid(visible=True, which='both')
    ax[0,2].set_title(label='Hydrographs', fontdict={'fontsize':10})
    ax[0,2].set_xlabel(xlabel='Timestamp', fontdict={'fontsize':9})
    ax[0,2].set_ylabel(ylabel='Flow Rate ($m^3/s$)', fontdict={'fontsize':9})
    ax[0,2].set_yticklabels(labels=ax[0,2].get_yticklabels(), fontdict={'fontsize':8})
    ax[0,2].set_xticklabels(labels=ax[0,2].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})

    for ind, col, color in zip(range(1,11), arg_flow_data.columns, colors):
        ax[ind,2].fill_between(
            x=arg_flow_data.index.to_numpy(),
            y1=arg_flow_data[col].to_numpy(),
            linewidth=0.5,
            color=color,
            label=col
            )

        ax[ind,2].legend(fontsize=8, framealpha=0.5)
        ax[ind,2].grid(visible=True, which='both')
        ax[ind,2].set_title(label='Hydrograph: {}'.format(col.title()), fontdict={'fontsize':10})
        ax[ind,2].set_xlabel(xlabel='Timestamp', fontdict={'fontsize':9})
        ax[ind,2].set_ylabel(ylabel='Flow Rate ($m^3/s$)', fontdict={'fontsize':9})
        ax[ind,2].set_yticklabels(labels=ax[0,2].get_yticklabels(), fontdict={'fontsize':8})
        ax[ind,2].set_xticklabels(labels=ax[0,2].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})

    fig.suptitle(
        t='ID Group: {}; Window Frequency: {}; Timestep: {}min\nFrequency: {}; Duration {}min ({} {}); Total Precipitation Depth:{}mm'.format(
            arg_id_group, arg_freq_window.title(), arg_time_step, arg_freq_label, arg_duration_in_min, arg_duration, arg_units, arg_depth), 
        x=0.5, y=1, fontsize=11)
    fig.tight_layout()
    fig.savefig(fname='{}freq_pattern__{}.png'.format(arg_graphs_pattern_dir, arg_label), bbox_inches='tight')

    plt.cla()
    plt.clf()
    plt.close(fig=fig) # suposedly, this line should be deleted

In [81]:
# USE THIS: when there is not enough memory instead of the next two blocks

# rain_data.apply(func=lambda arg: export_data_and_plot(
#     arg.freq_window, arg.freq_label, arg.duration_in_min, arg.time_step, 
#     arg.id_group, arg.duration, arg.units, arg.depth, arg.label, 
#     arg.cum_pattern_data, arg.prec_data, arg.flow_data,
#     rain_pattern_cum_dir, precipitation_dir, flow_rate_dir, graphs_pattern_dir
#     ), axis=1)

In [76]:
def run_multiprocessing_data_and_plots(arg_tuples_from_df, arg_n_core=n_core, arg_max_tasks_per_child=100):

    n_task = len(arg_tuples_from_df)
    n_task_per_chunk = math.ceil(n_task/arg_n_core)

    with mp.Pool(processes=arg_n_core, maxtasksperchild=arg_max_tasks_per_child) as pool:
        pool.starmap(func=export_data_and_plot, iterable=arg_tuples_from_df, chunksize=n_task_per_chunk)

In [77]:
run_multiprocessing_data_and_plots(
    [ind + [rain_pattern_cum_dir, precipitation_dir, flow_rate_dir, graphs_pattern_dir] for ind in rain_data[[
    'freq_window', 'freq_label', 'duration_in_min', 'time_step', 
    'id_group', 'duration', 'units', 'depth', 'label', 
    'cum_pattern_data', 'prec_data', 'flow_data']].values.tolist()],
    n_core
    )

  ax[0,0].set_yticklabels(labels=ax[0,0].get_yticklabels(), fontdict={'fontsize':8})
  ax[0,0].set_xticklabels(labels=ax[0,0].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})
  ax[0,0].set_yticklabels(labels=ax[0,0].get_yticklabels(), fontdict={'fontsize':8})
  ax[0,0].set_xticklabels(labels=ax[0,0].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})
  ax[ind,0].set_yticklabels(labels=ax[ind,0].get_yticklabels(), fontdict={'fontsize':8})
  ax[ind,0].set_xticklabels(labels=ax[ind,0].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})
  ax[ind,0].set_yticklabels(labels=ax[ind,0].get_yticklabels(), fontdict={'fontsize':8})
  ax[ind,0].set_yticklabels(labels=ax[ind,0].get_yticklabels(), fontdict={'fontsize':8})
  ax[0,0].set_yticklabels(labels=ax[0,0].get_yticklabels(), fontdict={'fontsize':8})
  ax[ind,0].set_xticklabels(labels=ax[ind,0].get_xticklabels(), fontdict={'fontsize':8, 'rotation':'vertical'})
  ax[ind,0].set_xticklabels(labels=ax[

In [80]:
rain_data.drop(columns=['flow_data', 'cum_pattern_data', 'prec_data'], inplace=True)
rain_data

Unnamed: 0,id_tag,id_duration,duration_in_min,freq_window,id_window,time_step,id_timestep,duration,units,freq_label,freq_tag,depth,id_group,max_flow_rate,label
0,0,0,10,frequent,0,5,0,10.0,min,12EY,freq_12EY,5.15,0,"[3.092359319515403, 3.0957524698775174, 3.0975...",000__00_00_0__0______frequent_______freq_12EY_...
1,0,1,15,frequent,0,5,0,15.0,min,12EY,freq_12EY,6.29,1,"[3.2815647857861006, 3.329428660559919, 3.2097...",001__00_01_0__0______frequent_______freq_12EY_...
2,0,2,20,frequent,0,5,0,20.0,min,12EY,freq_12EY,7.16,2,"[3.475541961735609, 3.043977490542852, 3.10098...",002__00_02_0__0______frequent_______freq_12EY_...
3,0,3,25,frequent,0,5,0,25.0,min,12EY,freq_12EY,7.86,3,"[2.8560383296836087, 2.983632935728466, 3.2046...",003__00_03_0__0______frequent_______freq_12EY_...
4,0,4,30,frequent,0,5,0,30.0,min,12EY,freq_12EY,8.46,4,"[3.1831950859288947, 3.285209879961788, 2.9636...",004__00_04_0__0______frequent_______freq_12EY_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,17,19,4320,rare,2,180,5,72.0,hour,1 in 2000,freq_1_in_2000,642.00,427,"[11.371888551159584, 7.247327487231274, 17.314...",427__17_19_5__2__________rare__freq_1_in_2000_...
428,17,20,5760,rare,2,180,5,96.0,hour,1 in 2000,freq_1_in_2000,656.00,428,"[10.248959306689986, 7.724093598878112, 13.244...",428__17_20_5__2__________rare__freq_1_in_2000_...
429,17,21,7200,rare,2,180,5,120.0,hour,1 in 2000,freq_1_in_2000,662.00,429,"[8.55124487004027, 7.695830049066009, 5.932231...",429__17_21_5__2__________rare__freq_1_in_2000_...
430,17,22,8640,rare,2,180,5,144.0,hour,1 in 2000,freq_1_in_2000,668.00,430,"[7.895309234218781, 5.408443503679562, 9.91701...",430__17_22_5__2__________rare__freq_1_in_2000_...


In [285]:
rain_data = pd.DataFrame(data=rain_data.groupby(by='id_tag'), columns=['id_tag', 'rain_data'])
rain_data['id_window'] = rain_data.rain_data.apply(lambda arg: np.unique(ar=arg.id_window.to_numpy())[0])
rain_data['freq_tag'] = rain_data.rain_data.apply(lambda arg: np.unique(ar=arg.freq_tag.to_numpy())[0])
rain_data['freq_label'] = rain_data.rain_data.apply(lambda arg: np.unique(ar=arg.freq_label.to_numpy())[0])
rain_data['freq_window'] = rain_data.rain_data.apply(lambda arg: np.unique(ar=arg.freq_window.to_numpy())[0])

rain_data['id_duration'] = rain_data.rain_data.apply(lambda arg: arg.id_duration.to_numpy())
rain_data['id_timestep'] = rain_data.rain_data.apply(lambda arg: arg.id_timestep.to_numpy())
rain_data['id_group'] = rain_data.rain_data.apply(lambda arg: arg.id_group.to_numpy())

rain_data['duration_in_min'] = rain_data.rain_data.apply(lambda arg: arg.duration_in_min.to_numpy())
rain_data['duration'] = rain_data.rain_data.apply(lambda arg: arg.duration.to_numpy())
rain_data['units'] = rain_data.rain_data.apply(lambda arg: arg.units.to_numpy())
rain_data['time_step'] = rain_data.rain_data.apply(lambda arg: arg.time_step.to_numpy())

rain_data['depth'] = rain_data.rain_data.apply(lambda arg: arg.depth.to_numpy())
rain_data['max_flow_rate'] = rain_data.rain_data.apply(lambda arg: np.column_stack(tup=arg.max_flow_rate.to_list()))

rain_data.drop(columns='rain_data', inplace=True)

rain_data # 'freq_window', 'id_window', 'freq_label', 'freq_tag'

Unnamed: 0,id_tag,id_window,freq_tag,freq_label,freq_window,id_duration,id_timestep,id_group,duration_in_min,duration,units,time_step,depth,max_flow_rate
0,0,0,freq_12EY,12EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[5.15, 6.29, 7.16, 7.86, 8.46, 9.86, 10.9, 12....","[[3.092359319515403, 3.2815647857861006, 3.475..."
1,1,0,freq_6EY,6EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 3...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[5.91, 7.25, 8.26, 9.07, 9.76, 11.4, 12.6, 14....","[[3.852450578162349, 3.910638042572801, 4.1327..."
2,2,0,freq_4EY,4EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 5...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[7.28, 8.96, 10.2, 11.2, 12.1, 14.1, 15.6, 18....","[[5.191176368113599, 5.136026766214772, 5.5324..."
3,3,0,freq_3EY,3EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 8...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[8.27, 10.2, 11.7, 12.8, 13.8, 16.1, 17.8, 20....","[[6.156700612844418, 6.136439034281131, 6.7272..."
4,4,0,freq_2EY,2EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[96, 97, 98, 99, 100, 101, 102, 103, 104, 105,...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[9.7, 12.0, 13.7, 15.1, 16.3, 19.0, 21.0, 24.2...","[[7.53031098290647, 7.590240425940807, 8.32422..."
5,5,0,freq_63_2_perc,63.2%,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[120, 121, 122, 123, 124, 125, 126, 127, 128, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[12.2, 15.3, 17.5, 19.3, 20.8, 24.3, 27.0, 31....","[[9.885519456843303, 10.238499571582, 11.29843..."
6,6,0,freq_50_perc,50%,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[144, 145, 146, 147, 148, 149, 150, 151, 152, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[13.6, 16.9, 19.4, 21.4, 23.0, 26.7, 29.5, 33....","[[11.0386091900869, 11.421403252525627, 12.617..."
7,7,0,freq_0_5EY,0.5EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[168, 169, 170, 171, 172, 173, 174, 175, 176, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[15.1, 18.8, 21.5, 23.7, 25.5, 29.7, 32.8, 37....","[[12.068699266952553, 12.785069066573325, 13.9..."
8,8,0,freq_20_perc,20%,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[192, 193, 194, 195, 196, 197, 198, 199, 200, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[17.6, 22.0, 25.2, 27.7, 29.7, 34.3, 37.7, 43....","[[13.920976477466098, 14.628503490466109, 16.1..."
9,9,0,freq_0_2EY,0.2EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[216, 217, 218, 219, 220, 221, 222, 223, 224, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[18.0, 22.5, 25.7, 28.3, 30.3, 35.0, 38.5, 43....","[[14.284205783397342, 14.923984104780079, 16.1..."


In [286]:
def col_index(arg_df):
    df_data_cols = pd.concat(
        objs=[
            pd.Series(data=arg_df[ind], name=ind) 
            for ind in [
                'id_duration', 'id_timestep', 'id_group', 'duration_in_min', 
                'duration', 'units', 'time_step', 'depth']], axis=1)
    df_data_cols = pd.MultiIndex.from_frame(df=df_data_cols)
    df_data_cols = pd.DataFrame(data=arg_df.max_flow_rate, columns=df_data_cols)

    return df_data_cols

In [287]:
rain_data.max_flow_rate = rain_data.apply(func=lambda arg: col_index(arg), axis=1)
rain_data

Unnamed: 0,id_tag,id_window,freq_tag,freq_label,freq_window,id_duration,id_timestep,id_group,duration_in_min,duration,units,time_step,depth,max_flow_rate
0,0,0,freq_12EY,12EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[5.15, 6.29, 7.16, 7.86, 8.46, 9.86, 10.9, 12....",id_duration 0 1 2 ...
1,1,0,freq_6EY,6EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 3...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[5.91, 7.25, 8.26, 9.07, 9.76, 11.4, 12.6, 14....",id_duration 0 1 2 ...
2,2,0,freq_4EY,4EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 5...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[7.28, 8.96, 10.2, 11.2, 12.1, 14.1, 15.6, 18....",id_duration 0 1 2 ...
3,3,0,freq_3EY,3EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 8...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[8.27, 10.2, 11.7, 12.8, 13.8, 16.1, 17.8, 20....",id_duration 0 1 2 ...
4,4,0,freq_2EY,2EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[96, 97, 98, 99, 100, 101, 102, 103, 104, 105,...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[9.7, 12.0, 13.7, 15.1, 16.3, 19.0, 21.0, 24.2...",id_duration 0 1 2 ...
5,5,0,freq_63_2_perc,63.2%,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[120, 121, 122, 123, 124, 125, 126, 127, 128, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[12.2, 15.3, 17.5, 19.3, 20.8, 24.3, 27.0, 31....",id_duration 0 1 ...
6,6,0,freq_50_perc,50%,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[144, 145, 146, 147, 148, 149, 150, 151, 152, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[13.6, 16.9, 19.4, 21.4, 23.0, 26.7, 29.5, 33....",id_duration 0 1 ...
7,7,0,freq_0_5EY,0.5EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[168, 169, 170, 171, 172, 173, 174, 175, 176, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[15.1, 18.8, 21.5, 23.7, 25.5, 29.7, 32.8, 37....",id_duration 0 1 ...
8,8,0,freq_20_perc,20%,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[192, 193, 194, 195, 196, 197, 198, 199, 200, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[17.6, 22.0, 25.2, 27.7, 29.7, 34.3, 37.7, 43....",id_duration 0 1 ...
9,9,0,freq_0_2EY,0.2EY,frequent,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, ...","[216, 217, 218, 219, 220, 221, 222, 223, 224, ...","[10, 15, 20, 25, 30, 45, 60, 90, 120, 180, 270...","[10.0, 15.0, 20.0, 25.0, 30.0, 45.0, 1.0, 1.5,...","[min, min, min, min, min, min, hour, hour, hou...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 30, 30...","[18.0, 22.5, 25.7, 28.3, 30.3, 35.0, 38.5, 43....",id_duration 0 1 ...


In [288]:
rain_data.drop(
    columns=[
        'id_duration', 'id_timestep', 'id_group', 'duration_in_min', 
        'duration', 'units', 'time_step', 'depth'
        ], inplace=True)
rain_data

Unnamed: 0,id_tag,id_window,freq_tag,freq_label,freq_window,max_flow_rate
0,0,0,freq_12EY,12EY,frequent,id_duration 0 1 2 ...
1,1,0,freq_6EY,6EY,frequent,id_duration 0 1 2 ...
2,2,0,freq_4EY,4EY,frequent,id_duration 0 1 2 ...
3,3,0,freq_3EY,3EY,frequent,id_duration 0 1 2 ...
4,4,0,freq_2EY,2EY,frequent,id_duration 0 1 2 ...
5,5,0,freq_63_2_perc,63.2%,frequent,id_duration 0 1 ...
6,6,0,freq_50_perc,50%,frequent,id_duration 0 1 ...
7,7,0,freq_0_5EY,0.5EY,frequent,id_duration 0 1 ...
8,8,0,freq_20_perc,20%,frequent,id_duration 0 1 ...
9,9,0,freq_0_2EY,0.2EY,frequent,id_duration 0 1 ...


In [289]:
rain_data['label'] = (
    (rain_data.id_tag.astype(str).str.len().max() - rain_data.id_tag.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_tag)) + rain_data.id_tag.astype(str) + '_' +
    (rain_data.id_window.astype(str).str.len().max() - rain_data.id_window.astype(str).str.len())*pd.Series(data=['0']*len(rain_data.id_window)) + rain_data.id_window.astype(str) + '__' +
    (rain_data.freq_window.str.len().max() - rain_data.freq_window.str.len())*pd.Series(data=['_']*len(rain_data.freq_window)) + rain_data.freq_window + '__' +
    (rain_data.freq_tag.str.len().max() - rain_data.freq_tag.str.len())*pd.Series(data=['_']*len(rain_data.freq_tag)) + rain_data.freq_tag
    )

rain_data

Unnamed: 0,id_tag,id_window,freq_tag,freq_label,freq_window,max_flow_rate,label
0,0,0,freq_12EY,12EY,frequent,id_duration 0 1 2 ...,00_0______frequent_______freq_12EY
1,1,0,freq_6EY,6EY,frequent,id_duration 0 1 2 ...,01_0______frequent________freq_6EY
2,2,0,freq_4EY,4EY,frequent,id_duration 0 1 2 ...,02_0______frequent________freq_4EY
3,3,0,freq_3EY,3EY,frequent,id_duration 0 1 2 ...,03_0______frequent________freq_3EY
4,4,0,freq_2EY,2EY,frequent,id_duration 0 1 2 ...,04_0______frequent________freq_2EY
5,5,0,freq_63_2_perc,63.2%,frequent,id_duration 0 1 ...,05_0______frequent__freq_63_2_perc
6,6,0,freq_50_perc,50%,frequent,id_duration 0 1 ...,06_0______frequent____freq_50_perc
7,7,0,freq_0_5EY,0.5EY,frequent,id_duration 0 1 ...,07_0______frequent______freq_0_5EY
8,8,0,freq_20_perc,20%,frequent,id_duration 0 1 ...,08_0______frequent____freq_20_perc
9,9,0,freq_0_2EY,0.2EY,frequent,id_duration 0 1 ...,09_0______frequent______freq_0_2EY


In [297]:
def plot_and_export_freq_graphs(
    arg_max_flow_rate, arg_label, arg_id_tag, arg_freq_window, arg_freq_label,
    arg_results_frequency_dir, arg_results_stats_dir, arg_graphs_frequency_dir
    ):

    matplotlib.use(backend='agg')

    # export max flow data
    arg_max_flow_rate.to_parquet(path='{}max_flow_rate__{}.parquet'.format(arg_results_frequency_dir, arg_label))

    # decompress index to get data
    data_index = arg_max_flow_rate.columns.to_frame()
    data_index.reset_index(drop=True, inplace=True)

    # stats and export
    stats = pd.concat(objs=[arg_max_flow_rate.describe(), arg_max_flow_rate.quantile(q=[0.05,0.95])])
    stats.rename(index={0.05:'5%', 0.95:'95%'}, inplace=True)
    stats = stats.reindex(index=stats.index[[0,1,2,3,8,4,5,6,9,7]])
    stats.to_parquet(path='{}max_flow_rate_stats__{}.parquet'.format(arg_results_stats_dir, arg_label))

    # confidence limits
    cl_median_based = stats.iloc[:,np.where(stats.to_numpy() == stats.loc['50%'].max())[1][0]].to_dict()
    cl_mean_based = stats.iloc[:,np.where(stats.to_numpy() == stats.loc['mean'].max())[1][0]].to_dict()

    # table
    ax_table = np.vstack(
        tup=[
            data_index.duration_in_min.to_numpy(),
            data_index.time_step.to_numpy(),
            data_index.depth.to_numpy(),
            stats.to_numpy()
            ])

    # plot and export
    fig, ax = plt.subplots(figsize=(15,6), num=1, clear=True)

    ax.boxplot(
        x=arg_max_flow_rate.to_numpy(),
        positions=data_index.id_duration.to_numpy(),
        showmeans=True,
        flierprops={'marker':'x', 'markeredgecolor':'steelblue'},
        medianprops={'linestyle':'-', 'color':'steelblue'},
        meanprops={'marker':'x', 'markeredgecolor':'red'}
        );

    ax.grid(visible=True, which='both')
    ax.set_xticks(ticks=data_index.id_duration.to_numpy())
    ax.set_xticklabels(
        labels=[
            '{} {}'.format(ind1, ind2) for ind1, ind2 in 
            zip(data_index.duration.to_numpy(), data_index.units.to_numpy())], 
        rotation='vertical', fontdict={'fontsize':8, 'rotation':'vertical'});
    ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});

    table = ax.table(
        cellText=ax_table.round(decimals=3),
        rowLabels=np.hstack((np.array(object=['duration', 'timestep', 'prec depth']), stats.index.to_numpy())),
        bbox=[0,-0.72,1,0.5])
    table.set_fontsize(8)

    ax.set_title(
        label='ID: {}; Window Frequency: {}; Frequency: {}'.format(
            arg_id_tag, arg_freq_window.title(), arg_freq_label
            ), 
        fontdict={'fontsize':10})
    ax.set_xlabel(xlabel='Duration ($min$)', fontdict={'fontsize':9})
    ax.set_ylabel(ylabel='Flow Rate ($m^3/s$)', fontdict={'fontsize':9})

    fig.savefig(
        fname='{}freq_graph__{}.png'.format(arg_graphs_frequency_dir, arg_label),
        bbox_inches='tight')
    
    plt.cla()
    plt.clf()
    plt.close(fig=fig)

    return cl_mean_based, cl_median_based

In [300]:
rain_data[['cl_mean_based', 'cl_median_based']] = rain_data.apply(
    func=lambda arg: plot_and_export_freq_graphs(
    arg.max_flow_rate, arg.label, arg.id_tag, arg.freq_window, arg.freq_label,
    results_frequency_dir, results_stats_dir, graphs_frequency_dir
    ), axis=1, result_type='expand')

rain_data

  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yticklabels(labels=ax.get_yticklabels(), fontdict={'fontsize':8});
  ax.set_yti

Unnamed: 0,id_tag,id_window,freq_tag,freq_label,freq_window,max_flow_rate,label,cl_mean_based,cl_median_based
0,0,0,freq_12EY,12EY,frequent,id_duration 0 1 2 ...,00_0______frequent_______freq_12EY,"{'count': 10.0, 'mean': 3.741519270781435, 'st...","{'count': 10.0, 'mean': 3.741519270781435, 'st..."
1,1,0,freq_6EY,6EY,frequent,id_duration 0 1 2 ...,01_0______frequent________freq_6EY,"{'count': 10.0, 'mean': 4.479421551962975, 'st...","{'count': 10.0, 'mean': 4.479421551962975, 'st..."
2,2,0,freq_4EY,4EY,frequent,id_duration 0 1 2 ...,02_0______frequent________freq_4EY,"{'count': 10.0, 'mean': 5.8241559666412455, 's...","{'count': 10.0, 'mean': 5.797802868903508, 'st..."
3,3,0,freq_3EY,3EY,frequent,id_duration 0 1 2 ...,03_0______frequent________freq_3EY,"{'count': 10.0, 'mean': 6.946012946713457, 'st...","{'count': 10.0, 'mean': 6.639290742797741, 'st..."
4,4,0,freq_2EY,2EY,frequent,id_duration 0 1 2 ...,04_0______frequent________freq_2EY,"{'count': 10.0, 'mean': 8.4725041761047, 'std'...","{'count': 10.0, 'mean': 8.110322286645346, 'st..."
5,5,0,freq_63_2_perc,63.2%,frequent,id_duration 0 1 ...,05_0______frequent__freq_63_2_perc,"{'count': 10.0, 'mean': 11.23962229643798, 'st...","{'count': 10.0, 'mean': 11.23962229643798, 'st..."
6,6,0,freq_50_perc,50%,frequent,id_duration 0 1 ...,06_0______frequent____freq_50_perc,"{'count': 10.0, 'mean': 12.460500420279269, 's...","{'count': 10.0, 'mean': 12.460500420279269, 's..."
7,7,0,freq_0_5EY,0.5EY,frequent,id_duration 0 1 ...,07_0______frequent______freq_0_5EY,"{'count': 10.0, 'mean': 13.73802796690092, 'st...","{'count': 10.0, 'mean': 13.73802796690092, 'st..."
8,8,0,freq_20_perc,20%,frequent,id_duration 0 1 ...,08_0______frequent____freq_20_perc,"{'count': 10.0, 'mean': 15.71152017908209, 'st...","{'count': 10.0, 'mean': 15.71152017908209, 'st..."
9,9,0,freq_0_2EY,0.2EY,frequent,id_duration 0 1 ...,09_0______frequent______freq_0_2EY,"{'count': 10.0, 'mean': 15.923457012488834, 's...","{'count': 10.0, 'mean': 15.923457012488834, 's..."


In [342]:
aep_per = frequency_table.AEP_1_in_x[:-1]
aep_per.name = 'AEP (1 in x)'
aep_per

0     1.000006
1     1.002485
2     1.018657
3     1.052396
4     1.156518
5     1.581977
6          2.0
7     2.541494
8          5.0
9     5.516656
10        10.0
11        20.0
12        50.0
13       100.0
14       200.0
15       500.0
16      1000.0
17      2000.0
Name: AEP (1 in x), dtype: object

In [363]:
cl_mean = pd.DataFrame(rain_data.cl_mean_based.tolist())
cl_mean = pd.concat(objs=[aep_per, cl_mean], axis=1).T
cl_mean.columns = [ind for ind in rain_data.freq_label]
cl_mean = pd.DataFrame(data=np.array(object=cl_mean.to_numpy(), dtype=np.float64), index=cl_mean.index, columns=cl_mean.columns)
cl_mean.to_parquet(path='{}max_outflows_mean_based.parquet'.format(output_dir))

cl_mean

Unnamed: 0,12EY,6EY,4EY,3EY,2EY,63.2%,50%,0.5EY,20%,0.2EY,10%,5%,2%,1%,1 in 200,1 in 500,1 in 1000,1 in 2000
AEP (1 in x),1.000006,1.002485,1.018657,1.052396,1.156518,1.581977,2.0,2.541494,5.0,5.516656,10.0,20.0,50.0,100.0,200.0,500.0,1000.0,2000.0
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,3.741519,4.479422,5.824156,6.946013,8.472504,11.239622,12.4605,13.738028,15.71152,15.923457,17.033705,17.901557,18.888348,19.654337,20.517256,21.86478,23.191748,24.806151
std,0.484155,0.563119,0.667178,0.784247,0.921891,1.099483,1.15201,1.211728,0.840837,0.768911,0.615254,0.603161,0.325408,0.374966,0.391798,0.455307,0.526174,0.5625
min,2.975393,3.614366,4.973328,5.951904,7.241483,9.669572,10.774063,11.989304,14.493267,14.79185,15.986408,17.11298,18.355096,18.986674,19.732785,20.907575,22.13256,23.768632
5%,2.99534,3.639322,5.060993,6.026955,7.424824,9.796477,10.8951,12.096184,14.617619,14.9309,16.187283,17.164124,18.426233,19.098959,19.941503,21.189786,22.375713,23.97434
25%,3.531658,4.199637,5.257337,6.302111,7.704494,10.510276,11.76168,12.999854,14.969718,15.287774,16.682392,17.460064,18.691022,19.472105,20.358379,21.654221,22.987075,24.427677
50%,3.779937,4.511133,5.594974,6.737265,8.264648,11.133214,12.419143,13.690513,15.944704,16.108978,16.967047,17.877843,18.847696,19.599778,20.480942,21.91126,23.242253,24.93225
75%,4.111231,4.863922,6.491484,7.78756,9.357574,12.053758,13.213745,14.450463,16.115155,16.294848,17.485477,18.310682,19.14552,19.971671,20.833498,22.177034,23.615519,25.269624
95%,4.315085,5.17714,6.664697,7.895679,9.654754,12.722184,14.028105,15.421218,16.800377,16.946148,17.862146,18.790041,19.303182,20.109722,20.983369,22.394058,23.769188,25.401088


In [367]:
cl_median = pd.DataFrame(rain_data.cl_median_based.tolist())
cl_median = pd.concat(objs=[aep_per, cl_median], axis=1).T
cl_median.columns = [ind for ind in rain_data.freq_label]
cl_median = pd.DataFrame(data=np.array(object=cl_median.to_numpy(), dtype=np.float64), index=cl_median.index, columns=cl_median.columns)
cl_median.to_parquet(path='{}max_outflows_median_based.parquet'.format(output_dir))

cl_median

Unnamed: 0,12EY,6EY,4EY,3EY,2EY,63.2%,50%,0.5EY,20%,0.2EY,10%,5%,2%,1%,1 in 200,1 in 500,1 in 1000,1 in 2000
AEP (1 in x),1.000006,1.002485,1.018657,1.052396,1.156518,1.581977,2.0,2.541494,5.0,5.516656,10.0,20.0,50.0,100.0,200.0,500.0,1000.0,2000.0
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,3.741519,4.479422,5.797803,6.639291,8.110322,11.239622,12.4605,13.738028,15.71152,15.923457,17.033705,17.901557,18.888348,19.654337,20.517256,21.86478,23.191748,24.806151
std,0.484155,0.563119,0.700725,1.031463,1.24104,1.099483,1.15201,1.211728,0.840837,0.768911,0.615254,0.603161,0.325408,0.374966,0.391798,0.455307,0.526174,0.5625
min,2.975393,3.614366,4.65235,5.179759,6.309187,9.669572,10.774063,11.989304,14.493267,14.79185,15.986408,17.11298,18.355096,18.986674,19.732785,20.907575,22.13256,23.768632
5%,2.99534,3.639322,4.824485,5.399799,6.557174,9.796477,10.8951,12.096184,14.617619,14.9309,16.187283,17.164124,18.426233,19.098959,19.941503,21.189786,22.375713,23.97434
25%,3.531658,4.199637,5.331232,5.727361,7.033303,10.510276,11.76168,12.999854,14.969718,15.287774,16.682392,17.460064,18.691022,19.472105,20.358379,21.654221,22.987075,24.427677
50%,3.779937,4.511133,5.804272,6.857311,8.368953,11.133214,12.419143,13.690513,15.944704,16.108978,16.967047,17.877843,18.847696,19.599778,20.480942,21.91126,23.242253,24.93225
75%,4.111231,4.863922,6.31001,7.253458,8.828487,12.053758,13.213745,14.450463,16.115155,16.294848,17.485477,18.310682,19.14552,19.971671,20.833498,22.177034,23.615519,25.269624
95%,4.315085,5.17714,6.685258,8.00797,9.794453,12.722184,14.028105,15.421218,16.800377,16.946148,17.862146,18.790041,19.303182,20.109722,20.983369,22.394058,23.769188,25.401088


In [373]:
fig, ax = plt.subplots(figsize=(15,10), num=1, clear=True)

matplotlib.use(backend='agg')

ax.plot(
    cl_mean.loc['AEP (1 in x)'].to_numpy(),
    cl_mean.loc['mean'].to_numpy(),
    '-o',
    markersize=3,
    color='tab:blue',
    label='mean'
    )

ax.plot(
    cl_mean.loc['AEP (1 in x)'].to_numpy(),
    cl_mean.loc['50%'].to_numpy(),
    '-.',
    linewidth=0.9,
    color='tab:red',
    label='median (50%)'
    )

ax.fill_between(
    x=cl_mean.loc['AEP (1 in x)'].to_numpy(),
    y1=cl_mean.loc['5%'].to_numpy(),
    y2=cl_mean.loc['95%'].to_numpy(),
    alpha=0.3,
    color='tab:blue',
    label='conf. limits (5-95%)'
    )

for ind1, ind2, ind3 in zip(
    cl_mean.columns.to_numpy(),
    cl_mean.loc['AEP (1 in x)'].to_numpy(), 
    cl_mean.loc['mean'].to_numpy()):
    ax.annotate(
        text=ind1,
        xy=(ind2, ind3),
        xytext=(5, -7.5),
        textcoords='offset points',
        fontsize=8
        )

table = ax.table(
    cellText=cl_mean.to_numpy().round(decimals=3),
    rowLabels=cl_mean.index.to_numpy(),
    colLabels=cl_mean.columns.to_numpy(),
    bbox=[0,-0.62,1,0.5])
table.set_fontsize(8)

ax.legend()
ax.grid(visible=True, which='both')
ax.set_xscale(value='log')
ax.set_title(label='Flow Quantiles - Mean-based', fontdict={'fontsize':10})
ax.set_xlabel(xlabel='AEP (1 in x)', fontdict={'fontsize':9})
ax.set_ylabel(ylabel='Flow Rate ($m^3/s$)', fontdict={'fontsize':9})

fig.tight_layout()
fig.savefig(fname='{}graph_max_outflows_mean_based.png'.format(output_dir), bbox_inches='tight')
plt.cla()
plt.clf()
plt.close(fig=fig)

In [374]:
fig, ax = plt.subplots(figsize=(15,10), num=1, clear=True)

matplotlib.use(backend='agg')

ax.plot(
    cl_median.loc['AEP (1 in x)'].to_numpy(),
    cl_median.loc['mean'].to_numpy(),
    '-o',
    markersize=3,
    color='tab:blue',
    label='mean'
    )

ax.plot(
    cl_median.loc['AEP (1 in x)'].to_numpy(),
    cl_median.loc['50%'].to_numpy(),
    '-.',
    linewidth=0.9,
    color='tab:red',
    label='median (50%)'
    )

ax.fill_between(
    x=cl_median.loc['AEP (1 in x)'].to_numpy(),
    y1=cl_median.loc['5%'].to_numpy(),
    y2=cl_median.loc['95%'].to_numpy(),
    alpha=0.3,
    color='tab:blue',
    label='conf. limits (5-95%)'
    )

for ind1, ind2, ind3 in zip(
    cl_median.columns.to_numpy(),
    cl_median.loc['AEP (1 in x)'].to_numpy(), 
    cl_median.loc['mean'].to_numpy()):
    ax.annotate(
        text=ind1,
        xy=(ind2, ind3),
        xytext=(5, -7.5),
        textcoords='offset points',
        fontsize=8
        )

table = ax.table(
    cellText=cl_median.to_numpy().round(decimals=3),
    rowLabels=cl_median.index.to_numpy(),
    colLabels=cl_median.columns.to_numpy(),
    bbox=[0,-0.62,1,0.5])
table.set_fontsize(8)

ax.legend()
ax.grid(visible=True, which='both')
ax.set_xscale(value='log')
ax.set_title(label='Flow Quantiles - Median-based', fontdict={'fontsize':10})
ax.set_xlabel(xlabel='AEP (1 in x)', fontdict={'fontsize':9})
ax.set_ylabel(ylabel='Flow Rate ($m^3/s$)', fontdict={'fontsize':9})

fig.tight_layout()
fig.savefig(fname='{}graph_max_outflows_median_based.png'.format(output_dir), bbox_inches='tight')
plt.cla()
plt.clf()
plt.close(fig=fig)