## Import necessary libraries

In [665]:
import sqlite3
import pandas as pd
from pathlib import Path
import numpy as np
from scipy.stats import norm

## Defining the path to the database and connecting to the database

In [666]:
db_path = Path(r'n:\Projects\11209000\11209353\B. Measurements and calculations\008 - Resultaten Proefvlucht\ZZL\7-2\databases\database_bekleding_bovengrens\traject_7_2.db')
csv_path = Path(r'n:\Projects\11209000\11209353\B. Measurements and calculations\008 - Resultaten Proefvlucht\ZZL\7-2\databases\database_bekleding_bovengrens\Basisberekening')
conn = sqlite3.connect(db_path)

### open the CSVs for Taken Measures:

In [667]:
taken_measure_vrm = pd.read_csv(csv_path.joinpath('TakenMeasures_Veiligheidsrendement.csv'))
taken_measures_doorsnede = pd.read_csv(csv_path.joinpath('TakenMeasures_Doorsnede-eisen.csv'))

# to get rid of the first row (which is empty)
taken_measure_vrm = taken_measure_vrm.iloc[1:]
taken_measures_doorsnede = taken_measures_doorsnede.iloc[1:]

# the first column is Unnamed. Change the name to step_number
taken_measure_vrm.rename(columns={'Unnamed: 0': 'step_number'}, inplace=True)
taken_measures_doorsnede.rename(columns={'Unnamed: 0': 'step_number'}, inplace=True)

# the Section column has its values changed to floats, while they should be integers, change them to integers
taken_measure_vrm['Section'] = taken_measure_vrm['Section'].astype(int)
taken_measures_doorsnede['Section'] = taken_measures_doorsnede['Section'].astype(int)

In [668]:
taken_measure_vrm.head(10)

Unnamed: 0,step_number,Section,option_index,LCC,BC,ID,name,year,yes/no,dcrest,dberm,beta_target,transition_level
1,1,1,398.0,82487.617481,83210.600629,8,[],0,-999.0,-999.0,-999.0,5.248081,0.5
2,2,11,323.0,427142.793164,83210.600629,8,[],0,-999.0,-999.0,-999.0,3.748238,3.65
3,3,13,303.0,925669.952363,83210.600629,8,[],0,-999.0,-999.0,-999.0,0.746078,3.49
4,4,1,354.0,41243.80874,75748.342507,8,[],0,-999.0,-999.0,-999.0,2.998904,0.75
5,5,11,318.0,213571.396582,75748.342507,8,[],0,-999.0,-999.0,-999.0,2.998381,3.9
6,6,13,313.0,308556.650788,75748.342507,8,[],0,-999.0,-999.0,-999.0,1.496325,3.74
7,7,14,292.0,182801.607523,75748.342507,8,[],0,-999.0,-999.0,-999.0,0.857865,3.0
8,8,15,292.0,127935.932774,75748.342507,8,[],0,-999.0,-999.0,-999.0,0.670567,2.95
9,9,14,317.0,182801.607523,48989.355851,8,[],0,-999.0,-999.0,-999.0,2.785343,3.25
10,10,15,325.0,127935.932774,48989.355851,8,[],0,-999.0,-999.0,-999.0,3.334187,3.2


## Obtain a list of all tables in the database

In [669]:
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)

print("The database consists of the following", len(tables), "tables:")
print(tables)

The database consists of the following 35 tables:
                               name
0                   DikeTrajectInfo
1                       SectionData
2                         Mechanism
3               MechanismPerSection
4         AssessmentMechanismResult
5           AssessmentSectionResult
6                   ComputationType
7               ComputationScenario
8                         SlopePart
9            BlockRevetmentRelation
10                        Buildings
11          CharacteristicPointType
12                   CombinableType
13     ComputationScenarioParameter
14                      MeasureType
15                          Measure
16                    CustomMeasure
17           CustomMeasureParameter
18           GrassRevetmentRelation
19                MeasurePerSection
20                    MeasureResult
21           MeasureResultMechanism
22           MeasureResultParameter
23             MeasureResultSection
24                   MechanismTable
25            

## Retrieving the types of optimization

In [670]:
table_name_id = 26 # this is the OptimizationRun
sql_query = 'SELECT * FROM {}'.format(tables.iloc[table_name_id].values[0])
optimization_type = pd.read_sql_query(sql_query, conn)
optimization_type_dict = dict(zip(optimization_type["name"], optimization_type["optimization_type_id"]))
print(optimization_type_dict)

{'Basisberekening Veiligheidsrendement': 1, 'Basisberekening Doorsnede-eisen': 2}


## Retrieve from the database tab "OptimizationSelectedMeasure" the id and investment_year 
####  optimization_run_id = 1 for VRM, 
#### optimization_run_id = 2 for DSM

In [671]:
optimization_run_id = 1

sql_query = 'SELECT * FROM OptimizationRun WHERE optimization_type_id = {}'.format(optimization_run_id)
optimization_run = pd.read_sql_query(sql_query, conn)
discount_rate = optimization_run["discount_rate"].values[0]
sql_query = 'SELECT * FROM OptimizationSelectedMeasure WHERE optimization_run_id = {}'.format(optimization_run_id)
selected_optimization_measure = pd.read_sql_query(sql_query, conn)

## create a list of ids in the selected_optimization_measure

In [672]:
selected_measure_ids = selected_optimization_measure["id"].tolist()

## Retrieve from the database tab "OptimizationStep" all rows where optimization_selected_measure_id is in selected_measure_ids

In [673]:
sql_query = 'SELECT * FROM OptimizationStep WHERE optimization_selected_measure_id IN ({})'.format(
    ', '.join([str(i) for i in selected_measure_ids]))
optimization_step = pd.read_sql_query(sql_query, conn)
# sort optimizationStep by id
optimization_step = optimization_step.sort_values(by="id")
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk
7,1,399,1,82487.62,507855600000.0
67,2,3148,2,509630.4,479548100000.0
79,3,3467,3,1435300.0,441389500000.0
5,4,355,4,1476544.0,413851500000.0
66,5,3143,5,1690116.0,394444100000.0


## retrieve from the database tab "OptimizationSelectedMeasure" all rows where optimization_selected_measure_id is in selected_measure_ids

In [674]:
sql_query = 'SELECT * FROM OptimizationSelectedMeasure WHERE id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["optimization_selected_measure_id"].tolist()]))
optimization_selected_measure = pd.read_sql_query(sql_query, conn)
optimization_selected_measure.head()

Unnamed: 0,id,optimization_run_id,measure_result_id,investment_year
0,3,1,2,0
1,312,1,168,0
2,314,1,170,0
3,315,1,171,0
4,317,1,173,0


## add the measure_result_id + investment_year to the optimization_step dataframe. These can be found in the 
## optimization_selected_measure dataframe

In [675]:
optimization_step = pd.merge(optimization_step, optimization_selected_measure[["id", "measure_result_id", "investment_year"]],
                                left_on="optimization_selected_measure_id", right_on="id", how="left")
# drop the "id_y" column
optimization_step = optimization_step.drop(columns="id_y")
optimization_step = optimization_step.rename(columns={"id_x": "id"})
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year
0,1,399,1,82487.62,507855600000.0,255,0
1,2,3148,2,509630.4,479548100000.0,1852,0
2,3,3467,3,1435300.0,441389500000.0,2027,0
3,4,355,4,1476544.0,413851500000.0,211,0
4,5,3143,5,1690116.0,394444100000.0,1847,0


In [676]:
# print how many investments are done in year t = 0 and t = 20
print("investments done in year t=0:")
print(np.sum(optimization_step.investment_year==0))
print("investments done in year t=20:")
print(np.sum(optimization_step.investment_year==20))      

investments done in year t=0:
448
investments done in year t=20:
23


## Add MeasureResult where id matches optimization_step["measure_result_id"] to the optimization_step dataframe
## Then, add the "name" column from the measure_result dataframe to the optimization_step dataframe

In [677]:
sql_query = 'SELECT * FROM MeasureResult WHERE id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["measure_result_id"].tolist()]))
measure_result = pd.read_sql_query(sql_query, conn)

optimization_step = pd.merge(optimization_step, measure_result[["id", "measure_per_section_id"]],
                                left_on="measure_result_id", right_on="id", how="left")
# drop the "id_y" column
optimization_step = optimization_step.drop(columns="id_y")
optimization_step = optimization_step.rename(columns={"id_x": "id"})
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id
0,1,399,1,82487.62,507855600000.0,255,0,6
1,2,3148,2,509630.4,479548100000.0,1852,0,54
2,3,3467,3,1435300.0,441389500000.0,2027,0,60
3,4,355,4,1476544.0,413851500000.0,211,0,6
4,5,3143,5,1690116.0,394444100000.0,1847,0,54


In [678]:
# add MeasurePerSection where id matches optimization_step["measure_per_section_id"] to the optimization_step dataframe
sql_query = 'SELECT * FROM MeasurePerSection WHERE id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["measure_per_section_id"].tolist()]))
measure_per_section = pd.read_sql_query(sql_query, conn)
# add the "section_id", "measure_id" column from the measure_per_section dataframe to the optimization_step dataframe
optimization_step = pd.merge(optimization_step, measure_per_section[["id", "section_id", "measure_id"]],
                                left_on="measure_per_section_id", right_on="id", how="left")
# drop the "id_y" column
optimization_step = optimization_step.drop(columns="id_y")
optimization_step = optimization_step.rename(columns={"id_x": "id"})
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8


In [679]:
# Where id in SectionData matches section_id in optimization_step dataframe, add the "section_name" column to the
# optimization_step dataframe
sql_query = 'SELECT * FROM SectionData WHERE id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["section_id"].tolist()]))
section_data = pd.read_sql_query(sql_query, conn)
# add the "section_name" column from the section_data dataframe to the optimization_step dataframe
optimization_step = pd.merge(optimization_step, section_data[["id", "section_name"]],
                                left_on="section_id", right_on="id", how="left")
# drop the "id_y" column
optimization_step = optimization_step.drop(columns="id_y")
# rename the "id_x" column to "id"
optimization_step = optimization_step.rename(columns={"id_x": "id"})
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id,section_name
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8,1
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8,11
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8,13
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8,1
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8,11


In [680]:
# Where id in Measure matches measure_id  in optimization_step dataframe, add all columns (except id) to the
# optimization_step dataframe
sql_query = 'SELECT * FROM Measure WHERE id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["measure_id"].tolist()]))
measure = pd.read_sql_query(sql_query, conn)
# add the "section_name" column from the section_data dataframe to the optimization_step dataframe
optimization_step = pd.merge(optimization_step, measure, left_on="measure_id", right_on="id", how="left")
# drop the "id_y" column
optimization_step = optimization_step.drop(columns="id_y")
# rename the "id_x" column to "id"
optimization_step = optimization_step.rename(columns={"id_x": "id"})
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id,section_name,measure_type_id,combinable_type_id,name,year
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8,1,6,4,Aanpassing bekleding,0
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8,11,6,4,Aanpassing bekleding,0
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8,13,6,4,Aanpassing bekleding,0
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8,1,6,4,Aanpassing bekleding,0
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8,11,6,4,Aanpassing bekleding,0


## Now we want to find the parameters corresponding the measure_result_id and add them to the dataframe.
### First, we add all parameters to the dataframe as a column
### After that, add "empty" columns to the optimization_step dataframe that contain -999.0 values. 
### These values will be later replaced with the parameters

In [681]:
sql_query = 'SELECT DISTINCT name FROM MeasureResultParameter'
measure_result_parameter = pd.read_sql_query(sql_query, conn)
print(measure_result_parameter["name"].tolist())

for name in measure_result_parameter["name"].tolist():
    optimization_step[name] = -999.0

optimization_step.head()

['DCREST', 'DBERM', 'BETA_TARGET', 'TRANSITION_LEVEL']


Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id,section_name,measure_type_id,combinable_type_id,name,year,DCREST,DBERM,BETA_TARGET,TRANSITION_LEVEL
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8,1,6,4,Aanpassing bekleding,0,-999.0,-999.0,-999.0,-999.0
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8,11,6,4,Aanpassing bekleding,0,-999.0,-999.0,-999.0,-999.0
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8,13,6,4,Aanpassing bekleding,0,-999.0,-999.0,-999.0,-999.0
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8,1,6,4,Aanpassing bekleding,0,-999.0,-999.0,-999.0,-999.0
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8,11,6,4,Aanpassing bekleding,0,-999.0,-999.0,-999.0,-999.0


In [682]:
# now find for each id in optimization_step database where "measure_result_id" corresponds with "measure_result_id" in
# MeasureResultParameter. If the name in the column "name" in MeasureResultParameter corresponds with a column in
# optimization_step, fill the value from the "value" column in MeasureResultParameter in the corresponding column in
# optimization_step 
sql_query = 'SELECT * FROM MeasureResultParameter WHERE measure_result_id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["measure_result_id"].tolist()]))
measure_result_parameter = pd.read_sql_query(sql_query, conn)
for index, row in measure_result_parameter.iterrows():
    if row["name"] in optimization_step.columns:
        optimization_step.loc[optimization_step["measure_result_id"] == row["measure_result_id"], row["name"]] = row["value"]
        
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id,section_name,measure_type_id,combinable_type_id,name,year,DCREST,DBERM,BETA_TARGET,TRANSITION_LEVEL
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8,1,6,4,Aanpassing bekleding,0,-999.0,-999.0,5.248081,0.5
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8,11,6,4,Aanpassing bekleding,0,-999.0,-999.0,3.748238,3.65
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8,13,6,4,Aanpassing bekleding,0,-999.0,-999.0,0.746078,3.49
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8,1,6,4,Aanpassing bekleding,0,-999.0,-999.0,2.998904,0.75
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8,11,6,4,Aanpassing bekleding,0,-999.0,-999.0,2.998381,3.9


In [683]:
# retreive initial risk from the database. This is done by multiplying the initial damage costs with the initial probability. First, find 
# the initial damage costs in DikeTrajectInfo: select flood_damage:
sql_query = 'SELECT * FROM DikeTrajectInfo'
dike_traject_info = pd.read_sql_query(sql_query, conn)
flood_damage = dike_traject_info["flood_damage"].values[0]
print(flood_damage)

# now the initial failure probability is a bit more tricky. It is the sum of the failure probabilities of all sections
# in AssessmentSectionResult where time=0.
sql_query = 'SELECT * FROM AssessmentSectionResult WHERE time=0'
assessment_section_result = pd.read_sql_query(sql_query, conn)

# derive the failure probability, we now still have betas: pf = norm.cdf(-beta)
assessment_section_result["pf"] = norm.cdf(-assessment_section_result["beta"])
print(assessment_section_result["pf"])
print("max failure =", max(assessment_section_result["pf"]))
print("Initial risk =", max(assessment_section_result["pf"]) * flood_damage)

# TO DO: simply multiplying won't do the trick, the risk is discounte over the years (using the discount rate) and the probability also increases in time

17000000000.0
0     0.500003
1     0.000015
2     0.000007
3     0.001438
4     0.000421
5     0.015516
6     0.000426
7     0.099121
8     0.500435
9     0.501679
10    0.195506
11    0.251271
12    0.035382
13    0.018796
14    0.000423
15    0.011414
16    0.023337
17    0.026489
18    0.024987
19    0.028844
20    0.055780
21    0.001852
22    0.000419
23    0.000450
24    0.000552
25    0.000500
26    0.000602
27    0.001004
28    0.000478
29    0.000139
30    0.000091
31    0.000063
32    0.000225
33    0.000194
34    0.000079
Name: pf, dtype: float64
max failure = 0.5016787433335955
Initial risk = 8528538636.6711235


In [684]:
# find the cost from MeasureResultSection where the measure_result_id matches measure_result_id in optimization_step df
# Then select only the cost where time = 0 and add these to a new column in optimization_step
sql_query = 'SELECT * FROM MeasureResultSection WHERE measure_result_id IN ({})'.format(
    ', '.join([str(i) for i in optimization_step["measure_result_id"].tolist()]))
measure_result_section = pd.read_sql_query(sql_query, conn)
measure_result_section = measure_result_section[measure_result_section["time"] == 0] 
optimization_step = pd.merge(optimization_step, measure_result_section[["measure_result_id", "cost"]],
                                left_on="measure_result_id", right_on="measure_result_id", how="left")

# rename cost to standalone_cost
optimization_step = optimization_step.rename(columns={"cost": "standalone_cost"})
optimization_step.head()

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id,section_name,measure_type_id,combinable_type_id,name,year,DCREST,DBERM,BETA_TARGET,TRANSITION_LEVEL,standalone_cost
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8,1,6,4,Aanpassing bekleding,0,-999.0,-999.0,5.248081,0.5,82487.617481
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8,11,6,4,Aanpassing bekleding,0,-999.0,-999.0,3.748238,3.65,427142.793164
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8,13,6,4,Aanpassing bekleding,0,-999.0,-999.0,0.746078,3.49,925669.952363
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8,1,6,4,Aanpassing bekleding,0,-999.0,-999.0,2.998904,0.75,123731.426221
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8,11,6,4,Aanpassing bekleding,0,-999.0,-999.0,2.998381,3.9,640714.189747


In [685]:
# Determine marginal cost of reinforcement per optimization step
optimization_step["marginal_cost"] = optimization_step["total_lcc"].diff()
optimization_step["marginal_cost"].fillna(optimization_step["total_lcc"], inplace=True)

# Determine risk reduction per optimization step
optimization_step["risk_reduction"] = -optimization_step["total_risk"].diff()
optimization_step["risk_reduction"].fillna(optimization_step["total_risk"], inplace=True)

# determine the BC-ratio. In some cases a NaN is found, if there are multiple rows with the same step_number. In that case,
# fill with the value that is not NaN which has the same step_number
optimization_step["bc"] = optimization_step["risk_reduction"]/optimization_step["marginal_cost"]
optimization_step["bc"].fillna(method="ffill", inplace=True)
# TODO: the first BC should be corrected. It is dependend on the initial risk: dike traject (beta_initial --> Pf_initial) * (total_risk_initial = 

optimization_step.head(10)

Unnamed: 0,id,optimization_selected_measure_id,step_number,total_lcc,total_risk,measure_result_id,investment_year,measure_per_section_id,section_id,measure_id,...,name,year,DCREST,DBERM,BETA_TARGET,TRANSITION_LEVEL,standalone_cost,marginal_cost,risk_reduction,bc
0,1,399,1,82487.62,507855600000.0,255,0,6,1,8,...,Aanpassing bekleding,0,-999.0,-999.0,5.248081,0.5,82487.62,82487.617481,507855600000.0,6156750.0
1,2,3148,2,509630.4,479548100000.0,1852,0,54,9,8,...,Aanpassing bekleding,0,-999.0,-999.0,3.748238,3.65,427142.8,427142.793164,28307530000.0,66271.81
2,3,3467,3,1435300.0,441389500000.0,2027,0,60,10,8,...,Aanpassing bekleding,0,-999.0,-999.0,0.746078,3.49,925670.0,925669.952363,38158560000.0,41222.64
3,4,355,4,1476544.0,413851500000.0,211,0,6,1,8,...,Aanpassing bekleding,0,-999.0,-999.0,2.998904,0.75,123731.4,41243.80874,27537990000.0,667687.8
4,5,3143,5,1690116.0,394444100000.0,1847,0,54,9,8,...,Aanpassing bekleding,0,-999.0,-999.0,2.998381,3.9,640714.2,213571.396582,19407390000.0,90870.75
5,6,3477,6,1998672.0,364515700000.0,2037,0,60,10,8,...,Aanpassing bekleding,0,-999.0,-999.0,1.496325,3.74,1234227.0,308556.650788,29928470000.0,96995.07
6,7,3819,7,2181474.0,350897800000.0,2235,0,66,11,8,...,Aanpassing bekleding,0,-999.0,-999.0,0.857865,3.0,182801.6,182801.607523,13617900000.0,74495.49
7,8,4174,8,2309410.0,341101100000.0,2446,0,72,12,8,...,Aanpassing bekleding,0,-999.0,-999.0,0.670567,2.95,127935.9,127935.932774,9796711000.0,76575.13
8,9,3844,9,2492211.0,335364700000.0,2260,0,66,11,8,...,Aanpassing bekleding,0,-999.0,-999.0,2.785343,3.25,365603.2,182801.607523,5736346000.0,31380.17
9,10,4207,10,2620147.0,328173000000.0,2479,0,72,12,8,...,Aanpassing bekleding,0,-999.0,-999.0,3.334187,3.2,255871.9,127935.932774,7191696000.0,56213.26


In [686]:
# now we want to merge optimization_steps on the step_number, but only if the step_number is not unique. We need a differnet
# action per column. Fill with the maximum: "step_number", "total_lcc", "total_risk", "section_id", "DCREST", "DBERM",
# "BETA_TARGET", "TRANSITION_LEVEL", "marginal_cost", "risk_reduction", "bc". # the standalone_cost should become the sum.
# # the columns name, measure_id, measure_per_section, measure_result_id, measure_type, measure_type_id, 
# optimization_selected_measure_id should be concatenated as a string seperated by a "+". 
df_merged = optimization_step.groupby("step_number").agg(
    section_id=("section_id", "max"),
    name=("name", lambda x: "+".join(x)),
    measure_id=("measure_id", lambda x: "+".join(x.astype(str))),
    total_lcc=("total_lcc", "max"),
    total_risk=("total_risk", "max"),
    standalone_cost=("standalone_cost", "sum"),
    marginal_cost=("marginal_cost", "max"),
    risk_reduction=("risk_reduction", "max"),
    bc=("bc", "max"),
    DCREST=("DCREST", "max"),
    DBERM=("DBERM", "max"),
    BETA_TARGET=("BETA_TARGET", "max"),
    TRANSITION_LEVEL=("TRANSITION_LEVEL", "max"),
    measure_type_id=("measure_type_id", lambda x: "+".join(x.astype(str))),
    measure_per_section_id=("measure_per_section_id", lambda x: "+".join(x.astype(str))),
    measure_result_id=("measure_result_id", lambda x: "+".join(x.astype(str))),
    optimization_selected_measure_id=("optimization_selected_measure_id", lambda x: "+".join(x.astype(str)))
).reset_index()

# TO DO: ADD investment year

In [687]:
df_merged

Unnamed: 0,step_number,section_id,name,measure_id,total_lcc,total_risk,standalone_cost,marginal_cost,risk_reduction,bc,DCREST,DBERM,BETA_TARGET,TRANSITION_LEVEL,measure_type_id,measure_per_section_id,measure_result_id,optimization_selected_measure_id
0,1,1,Aanpassing bekleding,8,8.248762e+04,5.078556e+11,8.248762e+04,82487.617481,5.078556e+11,6.156750e+06,-999.00,-999.0,5.248081,0.50,6,6,255,399
1,2,9,Aanpassing bekleding,8,5.096304e+05,4.795481e+11,4.271428e+05,427142.793164,2.830753e+10,6.627181e+04,-999.00,-999.0,3.748238,3.65,6,54,1852,3148
2,3,10,Aanpassing bekleding,8,1.435300e+06,4.413895e+11,9.256700e+05,925669.952363,3.815856e+10,4.122264e+04,-999.00,-999.0,0.746078,3.49,6,60,2027,3467
3,4,1,Aanpassing bekleding,8,1.476544e+06,4.138515e+11,1.237314e+05,41243.808740,2.753799e+10,6.676878e+05,-999.00,-999.0,2.998904,0.75,6,6,211,355
4,5,9,Aanpassing bekleding,8,1.690116e+06,3.944441e+11,6.407142e+05,213571.396582,1.940739e+10,9.087075e+04,-999.00,-999.0,2.998381,3.90,6,54,1847,3143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,293,35,Grondversterking binnenwaarts+Aanpassing bekle...,1+8,2.423685e+08,3.384063e+07,2.891434e+05,56389.068410,1.814003e+06,3.216940e+01,0.75,10.0,5.697083,1.60,1+6,205+210,6898+7066,11821+12106
293,294,16,Grondversterking binnenwaarts+Aanpassing bekle...,1+8,2.424961e+08,3.382423e+07,3.906800e+06,127626.720000,1.640207e+04,1.285160e-01,0.50,8.0,4.602560,4.45,1+6,91+96,3128+3304,5306+5608
294,295,2,Grondversterking binnenwaarts+Aanpassing bekle...,1+8,2.426556e+08,3.380783e+07,1.409839e+06,159533.400000,1.640207e+04,1.028128e-01,0.25,8.0,4.821829,2.05,1+6,7+12,278+415,432+703
295,296,19,Grondversterking binnenwaarts+Aanpassing bekle...,1+8,2.428164e+08,3.379143e+07,4.479450e+06,160767.360000,1.640207e+04,1.020236e-01,1.00,8.0,4.988586,4.60,1+6,109+114,3785+3960,6411+6696


In [534]:
# Closing the connection to the database
# conn.close()