In [1]:
from ema_workbench import Scenario, Policy, MultiprocessingEvaluator, ema_logging, load_results, save_results
from ema_workbench.analysis import prim
from problem_formulation import get_model_for_problem_formulation
from ema_workbench.em_framework.evaluators import BaseEvaluator

from ema_workbench.em_framework.optimization import (HyperVolume,
                                                     EpsilonProgress)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from ema_workbench.analysis import parcoords
import seaborn as sns

In [2]:
ACTORNAME = "Gorssel"
dike_model, planning_steps = get_model_for_problem_formulation(ACTORNAME)
outcomekeys = [outcome.name for outcome in dike_model.outcomes]

# Scenario selection

In [3]:
ACTOR = "genscen_Gorssel_50000_12-06-2021-03-06-15.tar.gz"
experiments, outcomes = load_results("data/generated/" + ACTOR)
outcomes_df = pd.DataFrame(outcomes)

In [4]:
outcomes_df.columns.values.tolist()

['Difference in Expected Annual Damage Gorssel-Deventer',
 'Difference in Expected Number of Deaths Gorssel-Deventer',
 'Gorssel Expected Annual Damage',
 'Gorssel Expected Number of Deaths',
 'Gorssel Budget Overrun',
 'Gorssel Total Costs']

In [5]:
outcomes_df

Unnamed: 0,Difference in Expected Annual Damage Gorssel-Deventer,Difference in Expected Number of Deaths Gorssel-Deventer,Gorssel Expected Annual Damage,Gorssel Expected Number of Deaths,Gorssel Budget Overrun,Gorssel Total Costs
0,-1219.596387,-9.711846e-07,9.488213e+06,0.004926,0,0.0
1,-393.024418,-2.241158e-07,3.958503e+07,0.022573,0,0.0
2,-1511.088849,-6.855404e-07,1.521954e+08,0.069047,0,0.0
3,-149.944611,-9.245500e-08,1.510227e+07,0.009312,0,0.0
4,-536.744525,-3.516669e-07,5.406037e+07,0.035420,0,0.0
...,...,...,...,...,...,...
49995,-695.221991,-3.872856e-07,7.002206e+07,0.039007,0,0.0
49996,-3266.659209,-2.565056e-06,7.221858e+07,0.034183,0,0.0
49997,-300.839149,-1.813858e-07,1.075421e+07,0.004638,0,0.0
49998,-6792.876649,-5.842054e-06,1.203924e+07,0.005334,0,0.0


In [6]:
DAMAGE = outcomes_df.columns[2]
DEATHS = outcomes_df.columns[3]
COSTS = outcomes_df.columns[5]

We are interested in the correlation between Deaths and Damage

In [7]:
outcomes_df[DAMAGE].corr(outcomes_df[DEATHS])

0.9776021595504573

So the correlation is INSANE

In [8]:
results = pd.concat([experiments, outcomes_df], axis=1)

In [9]:
worst_damage_df = results.loc[results[DAMAGE] > np.percentile(a=outcomes[DAMAGE], q=90)]
worst_deaths_df = results.loc[results[DEATHS] > np.percentile(a=outcomes[DEATHS], q=90)]
worst_ix =set(worst_damage_df["scenario"].tolist()) & set(worst_deaths_df["scenario"].tolist())

In [10]:
worst_case = results.iloc[list(worst_ix)].sample(n=1, random_state=1)

In [11]:
best_damage_df = results.loc[results[DAMAGE] <= np.percentile(a=outcomes[DAMAGE], q=10)]
best_deaths_df = results.loc[results[DEATHS] <= np.percentile(a=outcomes[DEATHS], q=10)]
best_ix = set(best_damage_df["scenario"].tolist()) & set(best_deaths_df["scenario"].tolist())

In [12]:
best_case = results.iloc[list(best_ix)].sample(n=1, random_state=1)

In [13]:
inverse = outcomes_df.index.isin(list(worst_ix) + list(best_ix))

In [14]:
inverse_df =outcomes_df[~inverse]

In [15]:
middle_damage_df = results.loc[(results[DAMAGE] > np.percentile(a=outcomes[DAMAGE], q=45)) & (results[DAMAGE] <= np.percentile(a=outcomes[DAMAGE], q=55))]
middle_deaths_df = results.loc[(results[DEATHS] > np.percentile(a=outcomes[DEATHS], q=45)) & (results[DEATHS] <= np.percentile(a=outcomes[DEATHS], q=55))]
middle_ix =set(middle_damage_df["scenario"].tolist()) & set(middle_deaths_df["scenario"].tolist())

In [16]:
middle_case = results.iloc[list(middle_ix)].sample(n=1, random_state=1)

In [17]:
low_damage_df = results.loc[(results[DAMAGE] > np.percentile(a=outcomes[DAMAGE], q=10)) & (results[DAMAGE] <= np.percentile(a=outcomes[DAMAGE], q=45))]
low_deaths_df = results.loc[(results[DEATHS] > np.percentile(a=outcomes[DEATHS], q=10)) & (results[DEATHS] <= np.percentile(a=outcomes[DEATHS], q=45))]
low_ix =set(low_damage_df["scenario"].tolist()) & set(low_deaths_df["scenario"].tolist())

In [18]:
high_damage_df = results.loc[(results[DAMAGE] > np.percentile(a=outcomes[DAMAGE], q=55)) & (results[DAMAGE] <= np.percentile(a=outcomes[DAMAGE], q=90))]
high_deaths_df = results.loc[(results[DEATHS] > np.percentile(a=outcomes[DEATHS], q=55)) & (results[DEATHS] <= np.percentile(a=outcomes[DEATHS], q=90))]
high_ix =set(high_damage_df["scenario"].tolist()) & set(high_deaths_df["scenario"].tolist())

In [20]:
low_case = results.iloc[list(low_ix)].sample(n=1, random_state=1)
high_case = results.iloc[list(high_ix)].sample(n=1, random_state=1)

In [21]:
uncertainties =list(dike_model.uncertainties._data.keys())

In [23]:
selected = pd.concat([best_case.loc[:, uncertainties], low_case.loc[:, uncertainties], middle_case.loc[:, uncertainties], high_case.loc[:, uncertainties], worst_case.loc[:, uncertainties]], axis=0)

In [24]:
cases = {0: "best", 1: "low", 2: "middle", 3: "high", 4: "worst"}

In [25]:
scenarios = [Scenario(f"{index}", **row) for index, row in selected.iterrows()]

# Optimisation

In [18]:
hypervolumemin = [0,0,0]
hypervolumemax = outcomes_df[outcomekeys[2:]].max(axis=0).values.tolist()

In [19]:
hypervolumemax[2] = 1.15e9

In [None]:
ema_logging.log_to_stderr(ema_logging.INFO)

nfe = 3000 + 1000

def optimize(scenario, nfe, model, converge_metrics, epsilons):


    with MultiprocessingEvaluator(model) as evaluator:
        results, convergence = evaluator.optimize(nfe=nfe, searchover='levers',
                                     convergence=convergence_metrics,
                                     epsilons=epsilons,
                                     reference=scenario, convergence_freq=200)
    return results, convergence


results = []
for scenario in scenarios:
    convergence_metrics = [HyperVolume(minimum=hypervolumemin, maximum=hypervolumemax),
                           EpsilonProgress()]
    epsilons = [1e3] * len(outcomekeys[2:])
    
    
    
    results.append(optimize(scenario, nfe, dike_model, convergence_metrics, epsilons))


[MainProcess/INFO] pool started
[MainProcess/INFO] generation 0: 0/4000 nfe
[MainProcess/INFO] generation 5: 497/4000 nfe
[MainProcess/INFO] generation 10: 996/4000 nfe
[MainProcess/INFO] generation 15: 1492/4000 nfe
[MainProcess/INFO] generation 20: 1992/4000 nfe
[MainProcess/INFO] generation 25: 2491/4000 nfe
[MainProcess/INFO] generation 30: 2990/4000 nfe
[MainProcess/INFO] generation 35: 3487/4000 nfe
[MainProcess/INFO] generation 40: 3983/4000 nfe
[MainProcess/INFO] optimization completed, found 1 solutions
[MainProcess/INFO] terminating pool
[MainProcess/INFO] pool started
[MainProcess/INFO] generation 0: 0/4000 nfe
[MainProcess/INFO] generation 5: 497/4000 nfe
[MainProcess/INFO] generation 10: 996/4000 nfe
[MainProcess/INFO] generation 15: 1494/4000 nfe
[MainProcess/INFO] generation 20: 1987/4000 nfe
[MainProcess/INFO] generation 25: 2486/4000 nfe
[MainProcess/INFO] generation 30: 2983/4000 nfe
[MainProcess/INFO] generation 35: 3480/4000 nfe
[MainProcess/INFO] generation 40: 397

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2,sharex=True)
for i, (_, convergence) in enumerate(results):
    ax1.plot(convergence.nfe, convergence.hypervolume, label=f'scenario {i}')
    ax2.plot(convergence.nfe, convergence.epsilon_progress, label=f'scenario {i}')

ax1.set_ylabel('hypervolume')
ax1.set_xlabel('nfe')
ax2.set_ylabel('$\epsilon$ progress')
ax2.set_xlabel('nfe')
fig.legend()
plt.show()

# Saving

In [20]:
for i, (result, convergence) in enumerate(results):
    result.to_csv("data/optimisation/" + ACTORNAME + "/results_" + cases[i] +".csv", index=False)
    convergence.to_csv("data/optimisation/" + ACTORNAME + "/convergence_" + cases[i] +".csv", index=False)

In [21]:
selected.to_csv("data/optimisation/" + ACTORNAME + "/selected.csv", index=False)

# Re-evaluate under deep uncertainty

In [28]:
n_scenarios = 10000

In [39]:
levers = [lever.name for lever in dike_model.levers]

In [40]:
policies = []
for i, (result, _) in enumerate(results):
    result = result.loc[:, levers]
    for j, row in result.iterrows():
        policy = Policy(f'scenario {cases[i]} option {j}', **row.to_dict())
        policies.append(policy)

In [41]:
with MultiprocessingEvaluator(dike_model) as evaluator:
    reevaluation_results = evaluator.perform_experiments(n_scenarios, policies=policies)

[MainProcess/INFO] pool started
[MainProcess/INFO] performing 1000 scenarios * 16 policies * 1 model(s) = 16000 experiments
[MainProcess/INFO] 1600 cases completed
[MainProcess/INFO] 3200 cases completed
[MainProcess/INFO] 4800 cases completed
[MainProcess/INFO] 6400 cases completed
[MainProcess/INFO] 8000 cases completed
[MainProcess/INFO] 9600 cases completed
[MainProcess/INFO] 11200 cases completed
[MainProcess/INFO] 12800 cases completed
[MainProcess/INFO] 14400 cases completed
[MainProcess/INFO] 16000 cases completed
[MainProcess/INFO] experiments finished
[MainProcess/INFO] terminating pool


# Saving

In [None]:
save_results(reevaluation_results, "data/optimisation/du_scen_" + n_scenarios + "_" + ACTORNAME + ".tar.gz")