In [15]:
from ema_workbench import Scenario, Policy, MultiprocessingEvaluator, ema_logging, load_results, save_results
from ema_workbench.analysis import prim
from problem_formulation import get_model_for_problem_formulation
from ema_workbench.em_framework.evaluators import BaseEvaluator

from ema_workbench.em_framework.optimization import (HyperVolume,
                                                     EpsilonProgress)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from ema_workbench.analysis import parcoords
import seaborn as sns

In [16]:
ACTORNAME = "Gorssel"
dike_model, planning_steps = get_model_for_problem_formulation(ACTORNAME)
outcomekeys = [outcome.name for outcome in dike_model.outcomes]

In [17]:
ACTOR = "genscen_Gorssel_50000_12-06-2021-03-06-15.tar.gz"
experiments, outcomes = load_results("data/generated/" + ACTOR)
outcomes_df = pd.DataFrame(outcomes)

In [18]:
DAMAGE = outcomes_df.columns[2]
DEATHS = outcomes_df.columns[3]
COSTS = outcomes_df.columns[5]

In [19]:
results = pd.concat([experiments, outcomes_df], axis=1)

In [20]:
worst_damage_df = results.loc[results[DAMAGE] > np.percentile(a=outcomes[DAMAGE], q=90)]
worst_deaths_df = results.loc[results[DEATHS] > np.percentile(a=outcomes[DEATHS], q=90)]
worst_ix =set(worst_damage_df["scenario"].tolist()) & set(worst_deaths_df["scenario"].tolist())

In [21]:
worst_case = results.iloc[list(worst_ix)].sample(n=1, random_state=1)

In [22]:
best_damage_df = results.loc[results[DAMAGE] <= np.percentile(a=outcomes[DAMAGE], q=10)]
best_deaths_df = results.loc[results[DEATHS] <= np.percentile(a=outcomes[DEATHS], q=10)]
best_ix = set(best_damage_df["scenario"].tolist()) & set(best_deaths_df["scenario"].tolist())

In [23]:
best_case = results.iloc[list(best_ix)].sample(n=1, random_state=1)

In [24]:
shrinkage = results.loc[(results[DAMAGE] > np.percentile(a=outcomes[DAMAGE], q=25)) & (results[DAMAGE] <= np.percentile(a=outcomes[DAMAGE], q=75))]

In [25]:
inverse = outcomes_df.index.isin(shrinkage["scenario"].tolist())

In [26]:
outcomes_of_interest = outcomes_df[~inverse]

In [31]:
outocme_of_interest = pd.DataFrame(outcomes_of_interest[[DAMAGE, DEATHS]])

In [28]:
from sklearn import preprocessing

# normalize outcomes on unit interval to ensure equal weighting of outcomes
x = outcomes_of_interest.values 
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
normalized_outcomes = pd.DataFrame(x_scaled, columns=outcomes_df.columns)


In [29]:
import itertools

n_scen = experiments[~inverse].shape[0]
indices = range(n_scen)
set_size = 3
n_scen

25000

In [None]:
import itertools

n_scen = experiments[~inverse].shape[0]
indices = range(n_scen)
set_size = 3
n_scen
combinations = itertools.combinations(indices, set_size)
combinations = list(combinations)

In [8]:
from scipy.spatial.distance import pdist, squareform

def evaluate_diversity_single(indices, distances, weight=0.5, distance='euclidean'):
    '''
    takes the outcomes and selected scenario set (decision variables), 
    returns a single 'diversity' value for the scenario set.
    outcomes : outcomes dictionary of the scenario ensemble
    decision vars : indices of the scenario set
    weight : weight given to the mean in the diversity metric. If 0, only minimum; if 1, only mean
    '''
    i, j = [e for e in zip(*itertools.combinations(indices, 2))]
    subset_distances = distances[i, j]
    minimum = np.min(subset_distances)
    mean = np.mean(subset_distances)
    diversity = (1-weight)*minimum + weight*mean
    
    return [diversity]


def find_maxdiverse_scenarios(distances, combinations):
    scores = []
    for indices in combinations:
        diversity = evaluate_diversity_single(indices, distances)
        scores.append((diversity, indices))

    return scores

In [None]:
from concurrent.futures import ProcessPoolExecutor
import os
import functools

distances = squareform(pdist(normalized_outcomes.values))

cores = os.cpu_count()
partial_function = functools.partial(find_maxdiverse_scenarios, distances)

with ProcessPoolExecutor(max_workers=cores) as executor:
    worker_data = np.array_split(combinations, cores)
    results = [e for e in executor.map(partial_function, worker_data)]
    results = list(itertools.chain.from_iterable(results))


In [None]:
results.sort(key=lambda entry:entry[0], reverse=True)
most_diverse = results[0]
most_diverse