This notebook gives an example of how one can analyze different runs in the same database. 
This is for instance relevant if multiple analysis with veiligheidsrendement are made for the same traject.

### Import necessary libraries

In [1]:
import copy
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from pathlib import Path
import numpy as np
from scipy.stats import norm
from peewee import fn
from collections import defaultdict
from vrtool.orm.models import *
from vrtool.orm.orm_controllers import open_database
from vrtool.common.enums import MechanismEnum
from postprocessing.database_analytics import *
from postprocessing.database_access_functions import * 
from postprocessing.generate_output import *
from scipy.interpolate import interp1d

sns.set(style="whitegrid")
colors = sns.color_palette("colorblind", 10)



### Get the runs that are in the database
First we get an overview of the runs in the database

In [None]:
database_path = Path(r'c:\Users\rikkert\OneDrive - Stichting Deltares\Desktop\dec2024_backup_C\VRM\traject10_voor_handreiking\10-2\database_10-2.sqlite')
run_list = get_overview_of_runs(database_path)
run_list = [run for run in run_list if run['optimization_type_name']== 'VEILIGHEIDSRENDEMENT']
pd.DataFrame(run_list)
print(run_list)

For each run, we get the optimization steps and the step with minimal total cost

In [3]:
optimization_steps = {run['name']: get_optimization_steps_for_run_id(database_path, run['id']) for run in run_list}
# add total cost as sum of total_lcc and total_risk in each step

minimal_tc_steps = {run: get_minimal_tc_step(steps) for run, steps in optimization_steps.items()}


### Reading measures per step
The next step is to read the measures and parameters of these measures for each optimization step such that we can compare the measures that are taken in each step and for each section.

In [4]:
lists_of_measures = {run['id']: get_measures_for_run_id(database_path, run['id']) for run in run_list}

measures_per_step = {run['id']: get_measures_per_step_number(lists_of_measures[run['id']]) for run in run_list}

If we want to see the failure probability per stap we first need to load the original assessment for each mechanism, and then we can compute the reliability for each step during the optimization. 

In [5]:
assessment_results = {mechanism: import_original_assessment(database_path, mechanism) 
                      for mechanism in [MechanismEnum.OVERFLOW, MechanismEnum.PIPING, MechanismEnum.STABILITY_INNER]}

reliability_per_step = {run['id']: get_reliability_for_each_step(database_path, measures_per_step[run['id']]) for run in run_list}

Based on these inputs we can make a stepwise_assessment based on the investments in reliability_per_step

In [6]:
stepwise_assessment = {run['id']: assessment_for_each_step(copy.deepcopy(assessment_results), reliability_per_step[run['id']]) for run in run_list}

The next step is to derive the traject probability for each mechanism for each step using the `calculate_traject_probability_for_steps` function

In [None]:
traject_prob = {run['id']: calculate_traject_probability_for_steps(stepwise_assessment[run['id']]) for run in run_list}

for count, run in enumerate(run_list):
    print(traject_prob[run['id']][minimal_tc_steps[run['name']]])


Now we check the measures for each section. We print the ids of the measures

In [None]:
measures_per_section = {run['id']: get_measures_per_section_for_step(measures_per_step[run['id']], minimal_tc_steps[run['name']]) for run in run_list}
section_names = [list(measures_per_section[run].keys()) for run in measures_per_section.keys()]
section_names = list(set([item for sublist in section_names for item in sublist]))

for section in section_names:
    for run in measures_per_section.keys():
        try:
            print(f"Section {section} in run {run} has measures {measures_per_section[run][section][0]} at time {measures_per_section[run][section][1]}")  
        except:
            print(f"Section {section} in run {run} has no measures in run {run}")

Now we get for each section the parameters of the measure + timing + cost. This is stored in a `pd.DataFrame` for each run.

In [None]:
section_parameters = defaultdict(dict)

for run in measures_per_section.keys():
    for section in measures_per_section[run].keys():
        section_parameters[run][section] = []
        for measure in measures_per_section[run][section][0]:
            parameters = get_measure_parameters(measure, database_path)
            parameters.update(get_measure_costs(measure, database_path))
            parameters.update(get_measure_type(measure, database_path))
            # if parameters name is "Grondversterking binnenwaarts" and dberm and dcrest are 0, set cost to 0
            if parameters['name'] == 'Grondversterking binnenwaarts' and parameters['dberm'] == 0 and parameters['dcrest'] == 0:
                print(f"Setting costs to 0 for measure {parameters['name']} in section {section} in run {run}")
                parameters['cost'] = 0
            section_parameters[run][section].append(parameters)

measure_parameters = {run['id']: measure_per_section_to_df(measures_per_section[run['id']], section_parameters[run['id']]) for run in run_list}


We list the investment for each year for each section

In [None]:
# we need final step of stepwise_assessment
# and assessment_results for initial state of each section
# we also have costs per section.

# determine final beta for traject:
initial_traject_probability_per_mechanism = calculate_traject_probability(assessment_results)
print(f"Initial traject probability is {initial_traject_probability_per_mechanism}")

n_time_steps = len(initial_traject_probability_per_mechanism[MechanismEnum.OVERFLOW])
time_steps = initial_traject_probability_per_mechanism[MechanismEnum.OVERFLOW].keys()
print(f"Number of time steps is {n_time_steps}")
print(f"Time steps are {time_steps}")

# print final step of stepwise_assessment (at minimal_tc_steps):
for count, run in enumerate(run_list):
    final_traject_probability_per_mechanism = traject_prob[run['id']][minimal_tc_steps[run['name']]]
    final_section_probability_per_mechanism = stepwise_assessment[run['id']][minimal_tc_steps[run['name']]]

print(f"Final traject probability is {final_traject_probability_per_mechanism}")
print(f"Final section probability is {final_section_probability_per_mechanism}")

In [None]:
# read from the database the economic damage, which is found in DikeTrajectInfo, and called flood_damage
# there is only 1 value in total, so we can use the first value

with open_database(database_path) as db:
    damage = DikeTrajectInfo.select(DikeTrajectInfo.flood_damage).where(DikeTrajectInfo.id == 1).get().flood_damage

print(f"Damage is {damage}")

In [None]:
discount_rate = 0.03

damage_per_year = np.divide(damage, np.power(1+discount_rate, np.arange(0,100)))
damage_per_year = damage_per_year.reshape(1,100)
print(damage_per_year)

In [None]:
# function that calculates total risk, given traject reliability per mechanism, damage and annual discount rate

def calculate_total_risk(traject_reliability, damage, discount_rate):
    n_years = 100
    damage_per_year = np.divide(damage, np.power(1+discount_rate, np.arange(0,n_years)))
    damage_per_year = damage_per_year.reshape(1,n_years)
    total_non_failure_probability = np.ones([1,n_years])
    traject_reliability_interp = {}
    for key in traject_reliability.keys():
        times,betas = zip(*traject_reliability[key].items())
        time_beta_interpolation = interp1d(times, betas, kind='linear', fill_value='extrapolate')
        traject_reliability_interp[key] = time_beta_interpolation(list(range(0,100)))
        traject_reliability_interp[key] = np.array(traject_reliability_interp[key]).reshape(1,100)
    for key in traject_reliability_interp.keys():
        total_non_failure_probability = np.multiply(total_non_failure_probability, 1-traject_reliability_interp[key])
    total_failure_probability = 1 - total_non_failure_probability
    expected_risk_per_year = np.multiply(damage_per_year, total_failure_probability)
    total_risk = np.sum(expected_risk_per_year)
    print(f"Total risk is {int(total_risk)}")
    return total_risk

total_risk = calculate_total_risk(final_traject_probability_per_mechanism, damage, discount_rate)

In [None]:
# Backward VRM index calculation:
# section by section, replace the final traject probability by the initial traject probability of that section
# then recalculate the traject failure probability, and calculate the increase in risk. We find the VR-index by dividing the increase in risk by the costs of the measure

# create empty lists and dictionaries
increase_in_traject_risk = []
section_costs = []
vr_index = {}

for section in section_names:
    final_section_probability_per_mechanism_temp = copy.deepcopy(final_section_probability_per_mechanism)

    for mechanism in assessment_results.keys():
        final_section_probability_per_mechanism_temp[mechanism][section]['beta'] = assessment_results[mechanism][section]['beta']

    # recalculate final traject probability
    final_traject_probability_per_mechanism_temp = calculate_traject_probability(final_section_probability_per_mechanism_temp)
    
    # calculate_total_risk
    risk_increased = calculate_total_risk(final_traject_probability_per_mechanism_temp, damage, discount_rate) 
    delta_risk = risk_increased - total_risk

    if section in list(measure_parameters[1]['section_id']):
        if measure_parameters[1][(measure_parameters[1]['section_id'] == section) & (measure_parameters[1]['name'] == 'Grondversterking binnenwaarts') & (measure_parameters[1]['dcrest'] == 0) & (measure_parameters[1]['dberm'] == 0)].shape[0] > 0:
            print(f"Section {section} has grondversterking 0/0")
            vr_index[section] = 0
        else:
            section_costs = measure_parameters[1][measure_parameters[1]['section_id'] == section]['LCC'].values[0]
            vr_index[section] = delta_risk / section_costs
    else:
        vr_index[section] = 0



In [None]:
measure_parameters[1]

In [None]:
print(vr_index)

# sort the dictionary by value
sorted_vr_index = dict(sorted(vr_index.items(), key=lambda item: item[1], reverse=True))
print(sorted_vr_index)



In [None]:
section_reliability_assessment_list = get_section_assessment_results(database_path)
print(section_reliability_assessment_list)

# get beta for each section at 'time'
time = 25

section_reliability_assessment_list = [section for section in section_reliability_assessment_list if section['time'] == time]
print(section_reliability_assessment_list)

for section_data in section_reliability_assessment_list:
    section_data['Pf'] = norm.cdf(-section_data['beta'])

# sort the list based on Pf and print sorted list in descending order
section_reliability_assessment_list_sorted_pf = sorted(section_reliability_assessment_list, key=lambda k: k['Pf'], reverse=True)
print(section_reliability_assessment_list_sorted_pf)

# print section ids and Pf
print()
for section in section_reliability_assessment_list_sorted_pf:
    print(f"section_id: {section['section_data']}, Pf: {section['Pf']}")


In [None]:
# create a pandas dataframe with all sections in database that are inanalyse = True

def get_sections_in_analysis(database_path):
    with open_database(database_path) as db:
        sections = SectionData.select().where(SectionData.in_analysis == True)
        sections_analysis = pd.DataFrame()
        # add section.section_name to the dataframe, using pandas concat
        for section in sections:
            sections_analysis = pd.concat([sections_analysis, pd.DataFrame({'id': [int(section.id)], 'section_name': [section.section_name]})], ignore_index=True)
            # sections_analysis = pd.concat([sections_analysis, pd.DataFrame({'section_name': [section.section_name]})], ignore_index=True)
        # section names are sometimes integers, sometimes strings. Try to make them integers if possible
        try:
            sections_analysis['section_name'] = sections_analysis['section_name'].astype(int)
        except:
            pass
    return sections_analysis

sections_analysis = get_sections_in_analysis(database_path)

print(sections_analysis)
print(sections_analysis.dtypes)

In [None]:
section_reliability_assessment_list = get_section_assessment_results(database_path)

# get beta for each section at 'time'
time = 25
section_reliability_assessment_list = [section for section in section_reliability_assessment_list if section['time'] == time]

# each item in the section_reliability_assessment_list is a dictionary with keys: id, section_data, beta, time. I want to remove 'id'
section_reliability_assessment_list = [{k: v for k, v in section.items() if k != 'id'} for section in section_reliability_assessment_list]

# add Pf to each key, based on beta. Pf = scipy.stats.norm.cdf(-beta)
for section_data in section_reliability_assessment_list:
    section_data['Pf'] = norm.cdf(-section_data['beta'])

print(section_reliability_assessment_list)

# add the probabilities to the sections_analysis dataframe where section_reliability_assessment_list [section_data] equals sections_analysis [section_name]
for section_data in section_reliability_assessment_list:
    sections_analysis.loc[sections_analysis['id'] == section_data['section_data'], f'Pf_init_t{time}'] = section_data['Pf']

print(sections_analysis)


In [None]:
# add the vr_index to the sections_analysis dataframe where sorted_vr_index.keys() sections_analysis [section_name]. Also add a ranking based on the order in which the id comes first in sorted_vr_index
for count, section in enumerate(sorted_vr_index.keys()):
    sections_analysis.loc[sections_analysis['id'] == section, 'vr_index'] = sorted_vr_index[section]
    sections_analysis.loc[sections_analysis['id'] == section, 'vr_index_ranking'] = count+1 

# replace all NAN values in vr_index and vr_index_ranking with -999. Set the type for vr_index_ranking to int
sections_analysis['vr_index'] = sections_analysis['vr_index'].fillna(-999)
sections_analysis['vr_index_ranking'] = sections_analysis['vr_index_ranking'].fillna(-999)
sections_analysis['vr_index_ranking'] = sections_analysis['vr_index_ranking'].astype(int)

print(sections_analysis)

In [None]:
def get_forward_vr_order(measures_per_step):
        forward_vr_order = [step['section_id'][0] for _idx, (step) in enumerate(measures_per_step[run['id']].values()) if _idx <= minimal_tc_steps[run['name']]]
        #take first of unique values, keep order
        forward_vr_order = [x for i, x in enumerate(forward_vr_order) if forward_vr_order.index(x) == i]
        return forward_vr_order

order_forward_vr = get_forward_vr_order(measures_per_step)

print(order_forward_vr)

# add the forward_vr_order to the sections_analysis dataframe where order_forward_vr equals sections_analysis [id]
for count, section in enumerate(order_forward_vr):
    sections_analysis.loc[sections_analysis['id'] == section, 'forward_vr_order'] = count+1

# replace all NAN values in forward_vr_order with -999. Set the type for forward_vr_order to int
sections_analysis['forward_vr_order'] = sections_analysis['forward_vr_order'].fillna(-999)
sections_analysis['forward_vr_order'] = sections_analysis['forward_vr_order'].astype(int)

print(sections_analysis)