First, packages are imported.

In [105]:
#   import packages
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn.objects as so
import networkx as nx
import pickle
import functools

from ema_workbench import (
    Model,
    Policy,
    Scenario,
    ema_logging,
    SequentialEvaluator,
    MultiprocessingEvaluator,
    util,
    ScalarOutcome,
)
from dike_model_function import DikeNetwork  # @UnresolvedImport
from problem_formulation import get_model_for_problem_formulation, sum_over, sum_over_time
from ema_workbench.em_framework.samplers import sample_uncertainties
from ema_workbench.em_framework.optimization import ArchiveLogger, EpsilonProgress
from ema_workbench.em_framework import parameters_from_csv
from ema_workbench.em_framework.evaluators import BaseEvaluator
from ema_workbench.analysis import parcoords
from ema_workbench.analysis import prim
from ema_workbench import Samplers

Load results from Uncertainty Analysis

In [106]:
uncertainty_results = pd.read_pickle(r'../generated_datasets/policy_uncertainty_provinces.pkl')
experiments_and_results = pd.read_pickle(r'../generated_datasets/policy_uncertainty_experiments_results.pkl')
aggregated_outcomes = pd.read_pickle(r'../generated_datasets/policy_uncertainty_aggregated.pkl')
uncertainty_results_run = pd.read_pickle(r'../generated_datasets/policy_uncertainty_test.pkl')

# Calculate Robustness Metrics

First, the time data is combined into a single value, and saved into a DataFrame.

In [107]:
experiments, outcomes = uncertainty_results_run

results_df = pd.DataFrame()
for k in outcomes:
    test_list = []
    for i in range(len(experiments)):
        t = outcomes[k][i].sum()
        test_list.append(t)
    results_df[k]=test_list

Then, we calculate the cost of death for all death columns, making it able to easily construct a total cost.

In [108]:
death_cost_per_person = 6_300_000

# calculate the cost of death so every column has the same unit (€)
results_df["A.1_Deaths_Cost"]=results_df["A.1_Expected Number of Deaths"]*death_cost_per_person
results_df["A.2_Deaths_Cost"]=results_df["A.2_Expected Number of Deaths"]*death_cost_per_person
results_df["A.3_Deaths_Cost"]=results_df["A.3_Expected Number of Deaths"]*death_cost_per_person
results_df["A.4_Deaths_Cost"]=results_df["A.4_Expected Number of Deaths"]*death_cost_per_person
results_df["A.5_Deaths_Cost"]=results_df["A.5_Expected Number of Deaths"]*death_cost_per_person

to_drop = ["A.1_Expected Number of Deaths","A.2_Expected Number of Deaths","A.3_Expected Number of Deaths","A.4_Expected Number of Deaths","A.5_Expected Number of Deaths"]
results_df.drop(to_drop, axis=1, inplace=True)

### Gelderland - Overijssel Split

A function is created to sort the data into two columns: Gelderland and Overijssel.

In [109]:
def combine_columns_province(dataframe, name):
    combined_columns_gelderland = []
    combined_columns_overijssel = []
    for x in dataframe.columns:
        if name in x:
            if x.startswith('A.1') or x.startswith('A.2') or x.startswith('A.3'):
                combined_columns_gelderland.append(x)
            if x.startswith('A.4') or x.startswith('A.5'):
                combined_columns_overijssel.append(x)


    dataframe['Total ' + str(name) + ' Gelderland'] = dataframe[combined_columns_gelderland].sum(axis=1)
    dataframe['Total ' + str(name) + ' Overijssel'] = dataframe[combined_columns_overijssel].sum(axis=1)


A new DataFrame is then created with the province-sorted data.

In [110]:
provinces = results_df.copy()

combine_columns_province(provinces, 'Expected Annual Damage')
combine_columns_province(provinces, 'Dike Investment Costs')
combine_columns_province(provinces, 'Deaths_Cost')

The excess columns are then deleted, and columns are created with total cost and total cost for the province of Overijssel.

In [111]:
aggregated_outcomes_provinces = pd.concat([experiments[['scenario','policy']],provinces], axis=1).iloc[:,2:]

for x in aggregated_outcomes_provinces.columns:

        if x.startswith('A.1') or x.startswith('A.2') or x.startswith('A.3') or x.startswith('A.4') or x.startswith('A.5'):
            aggregated_outcomes_provinces.drop(x, axis=1, inplace=True)
        else:
            pass

aggregated_outcomes_provinces['total_cost[€]']= aggregated_outcomes_provinces.sum(axis=1)
overijssel_sum = ['RfR Total Costs','Expected Evacuation Costs','Total Expected Annual Damage Overijssel','Total Dike Investment Costs Overijssel','Total Deaths_Cost Overijssel']
aggregated_outcomes_provinces['total_cost[€]_overijssel']= aggregated_outcomes_provinces.loc[:,overijssel_sum].sum(axis=1)

aggregated_outcomes_provinces

Unnamed: 0,RfR Total Costs,Expected Evacuation Costs,Total Expected Annual Damage Gelderland,Total Expected Annual Damage Overijssel,Total Dike Investment Costs Gelderland,Total Dike Investment Costs Overijssel,Total Deaths_Cost Gelderland,Total Deaths_Cost Overijssel,total_cost[€],total_cost[€]_overijssel
0,121200000.0,0.0,3.503253e+09,1.195769e+06,1.035357e+08,8.453408e+06,2.502326e+07,4213.778424,3.762665e+09,1.308534e+08
1,121200000.0,0.0,1.588635e+09,0.000000e+00,1.035357e+08,8.453408e+06,2.212850e+07,0.000000,1.843953e+09,1.296534e+08
2,121200000.0,0.0,3.029340e+08,3.244102e+07,1.035357e+08,8.453408e+06,2.641041e+06,197638.128681,5.714029e+08,1.622921e+08
3,121200000.0,0.0,2.548956e+09,0.000000e+00,1.035357e+08,8.453408e+06,2.552349e+07,0.000000,2.807668e+09,1.296534e+08
4,121200000.0,0.0,6.949494e+07,1.497793e+08,1.035357e+08,8.453408e+06,6.681032e+05,934871.641676,4.540663e+08,2.803676e+08
...,...,...,...,...,...,...,...,...,...,...
224995,121200000.0,0.0,2.778131e+09,0.000000e+00,7.331545e+07,7.332080e+06,1.214940e+07,0.000000,2.992128e+09,1.285321e+08
224996,121200000.0,0.0,2.703409e+09,0.000000e+00,7.331545e+07,7.332080e+06,1.226335e+07,0.000000,2.917520e+09,1.285321e+08
224997,121200000.0,0.0,2.486651e+09,0.000000e+00,7.331545e+07,7.332080e+06,1.220433e+07,0.000000,2.700703e+09,1.285321e+08
224998,121200000.0,0.0,5.964043e+08,0.000000e+00,7.331545e+07,7.332080e+06,5.412402e+06,0.000000,8.036643e+08,1.285321e+08


Lastly, we add the policy number.

In [112]:
aggregated_outcomes_provinces_policy = aggregated_outcomes_provinces.copy()
aggregated_outcomes_provinces_policy['policy'] = experiments.iloc[:,-2]
aggregated_outcomes_provinces_policy

Unnamed: 0,RfR Total Costs,Expected Evacuation Costs,Total Expected Annual Damage Gelderland,Total Expected Annual Damage Overijssel,Total Dike Investment Costs Gelderland,Total Dike Investment Costs Overijssel,Total Deaths_Cost Gelderland,Total Deaths_Cost Overijssel,total_cost[€],total_cost[€]_overijssel,policy
0,121200000.0,0.0,3.503253e+09,1.195769e+06,1.035357e+08,8.453408e+06,2.502326e+07,4213.778424,3.762665e+09,1.308534e+08,0
1,121200000.0,0.0,1.588635e+09,0.000000e+00,1.035357e+08,8.453408e+06,2.212850e+07,0.000000,1.843953e+09,1.296534e+08,0
2,121200000.0,0.0,3.029340e+08,3.244102e+07,1.035357e+08,8.453408e+06,2.641041e+06,197638.128681,5.714029e+08,1.622921e+08,0
3,121200000.0,0.0,2.548956e+09,0.000000e+00,1.035357e+08,8.453408e+06,2.552349e+07,0.000000,2.807668e+09,1.296534e+08,0
4,121200000.0,0.0,6.949494e+07,1.497793e+08,1.035357e+08,8.453408e+06,6.681032e+05,934871.641676,4.540663e+08,2.803676e+08,0
...,...,...,...,...,...,...,...,...,...,...,...
224995,121200000.0,0.0,2.778131e+09,0.000000e+00,7.331545e+07,7.332080e+06,1.214940e+07,0.000000,2.992128e+09,1.285321e+08,44
224996,121200000.0,0.0,2.703409e+09,0.000000e+00,7.331545e+07,7.332080e+06,1.226335e+07,0.000000,2.917520e+09,1.285321e+08,44
224997,121200000.0,0.0,2.486651e+09,0.000000e+00,7.331545e+07,7.332080e+06,1.220433e+07,0.000000,2.700703e+09,1.285321e+08,44
224998,121200000.0,0.0,5.964043e+08,0.000000e+00,7.331545e+07,7.332080e+06,5.412402e+06,0.000000,8.036643e+08,1.285321e+08,44


# Generate outcome optimal for Overijssel

The best 5 (cheapest) policies are selected. For this, we need the robustness metric again: Mean Square Deviation. For this, we construct a function that makes the column.

In [141]:
def mean_square_deviation(df_input, df_output, column_name):
    grouping = df_input.groupby(['policy']).agg({column_name:['mean', 'std']})
    df_output[(column_name,'mean square deviation')] = np.square(grouping[(column_name, 'mean')]) + np.square(grouping[(column_name, 'std')])
    grouping.drop([(column_name, 'mean'), (column_name, 'std')], axis=1, inplace=True)

    return grouping

In [142]:
robust_df = pd.DataFrame()

mean_square_deviation(aggregated_outcomes_provinces_policy, robust_df, 'RfR Total Costs')
mean_square_deviation(aggregated_outcomes_provinces_policy, robust_df, 'Expected Evacuation Costs')
mean_square_deviation(aggregated_outcomes_provinces_policy, robust_df, 'Total Expected Annual Damage Overijssel')
mean_square_deviation(aggregated_outcomes_provinces_policy, robust_df, 'Total Dike Investment Costs Overijssel')
mean_square_deviation(aggregated_outcomes_provinces_policy, robust_df, 'Total Deaths_Cost Overijssel')

robust_df

Unnamed: 0_level_0,"(RfR Total Costs, mean square deviation)","(Expected Evacuation Costs, mean square deviation)","(Total Expected Annual Damage Overijssel, mean square deviation)","(Total Dike Investment Costs Overijssel, mean square deviation)","(Total Deaths_Cost Overijssel, mean square deviation)"
policy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.468944e+16,0.0,2.012732e+16,71460100000000.0,605662900000.0
1,3.840281e+17,0.0,3403165000000000.0,39952520000000.0,94490800000.0
2,7157160000000000.0,2150062000.0,7.645692e+16,93996810000000.0,54118980000.0
3,5.875776e+16,434349900.0,8135886000000000.0,39952520000000.0,29661450000.0
4,0.0,770181100.0,5.091462e+16,39952520000000.0,200120200000.0
5,1.468944e+16,2489074000.0,1.89216e+16,53759400000000.0,8060732000.0
6,1.281198e+18,867791500.0,2780349000000000.0,0.0,1729214000.0
7,5.875776e+16,422257000.0,9451960000000000.0,39952520000000.0,34617590000.0
8,3.840281e+17,388150200.0,2232683000000000.0,39952520000000.0,7656729000.0
9,1.468944e+16,0.0,1.900966e+16,53759400000000.0,561154200000.0


In [123]:
n_scenarios_of_interest = 5

results_of_interest = grouping.sort_values(by=('total_cost[€]_overijssel','mean square deviation'), ascending=True).head(n_scenarios_of_interest)

with open(r'..\generated_datasets\final_policies_overijssel.pkl','wb') as pickle_file:
    pickle.dump(results_of_interest, pickle_file)

results_of_interest

Unnamed: 0_level_0,total_cost[€]_overijssel,total_cost[€]_overijssel,total_cost[€]_overijssel
Unnamed: 0_level_1,mean,std,mean square deviation
policy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
26,83313910.0,193350100.0,4.432547e+16
11,84076510.0,193445300.0,4.448995e+16
5,176081600.0,129186500.0,4.769388e+16
44,176305700.0,129831600.0,4.793996e+16
9,176401300.0,130086800.0,4.803999e+16


### Visualization

In [126]:
results_of_interest.index.values

[26, 11, 5, 44, 9]
Categories (45, int64): [0, 1, 2, 3, ..., 41, 42, 43, 44]

In [147]:
data = robust_df.iloc[results_of_interest.index.values,:]
limits = parcoords.get_limits(data)
paraxes = parcoords.ParallelAxes(limits)
paraxes.plot(data)
sns.despine()
plt.savefig(r'../visualizations/Scenario_Discovery/pareto_visualization.png', bbox_inches='tight')
sns.set(rc={'figure.figsize': (15, 6)})

# Generate outcome optimal for Gelderland and Overijssel combined.

The same is done for the total cost of the provinces of Gelderland and Overijssel combined.

In [115]:
grouping = aggregated_outcomes_provinces_policy.groupby(['policy']).agg({'total_cost[€]':['mean', 'std']})
grouping[('total_cost[€]','mean square deviation')] = np.square(grouping[('total_cost[€]', 'mean')]) + np.square(grouping[('total_cost[€]', 'std')])

In [116]:
n_scenarios_of_interest = 5

results_of_interest = grouping.sort_values(by=('total_cost[€]','mean square deviation'), ascending=True).head(n_scenarios_of_interest)

with open(r'..\generated_datasets\final_policies.pkl','wb') as pickle_file:
    pickle.dump(results_of_interest, pickle_file)

results_of_interest

Unnamed: 0_level_0,total_cost[€],total_cost[€],total_cost[€]
Unnamed: 0_level_1,mean,std,mean square deviation
policy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
36,1033884000.0,714046900.0,1.578779e+18
2,1181345000.0,713392500.0,1.904505e+18
10,1373723000.0,966050600.0,2.820368e+18
25,1422984000.0,978760500.0,2.982856e+18
1,1481486000.0,888033400.0,2.983405e+18


### PRIM

In [117]:
x = experiments_and_results.iloc[:, :19]

for var in aggregated_outcomes.iloc[:, 2:].columns:
    var_path = str('../visualizations/Scenario_Discovery/policy_' + var + '_PRIM.png')
    var2_path = str('../visualizations/Scenario_Discovery/policy_' + var + '_PRIM_inspect.png')

    y_total_damage = aggregated_outcomes[var].values
    y_total_damage = y_total_damage > np.percentile(y_total_damage, 75)

    prim_alg_DAM = prim.Prim(x,
                             y_total_damage,
                             threshold=0.70,
                             peel_alpha=0.05, )  # deze variabelen nog tweaken
    box_DAM = prim_alg_DAM.find_box()

    img = box_DAM.show_tradeoff()
    plt.title(var)
    fig = img.get_figure()
    fig.savefig(var_path, bbox_inches='tight')
    fig.clf()

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>