In [None]:
import os
from collections import Counter, defaultdict

import matplotlib.patches as patches
import matplotlib.pyplot as plt
import pandas as pd
import yaml
from IPython.display import Image
from matplotlib.colors import ListedColormap
from sklearn.cluster import KMeans

from config import SPECS_FLOOD_YAMLS, FIG_RESULTS_DIR
from drawutil import rfcolors, mpl_config

In [None]:
# global setting
SCALE = 1 # scale = 1.0 for write-up dimensions
mpl_config(SCALE)
plt.rcParams['xtick.labelsize'] = 6 * SCALE
plt.rcParams['ytick.labelsize'] = 6 * SCALE
plt.rcParams['axes.labelpad'] = 8 * SCALE

In [None]:
# user inputs
casestudies = [
    'imelda',
    'harvey'
]

# max modeled flooding
r_hat = 3

In [None]:
severity = ListedColormap(
    name='severity',
    colors=rfcolors[:r_hat+1]
)

In [None]:
def plot(df, df_ev, df_mv, gridlines=False, legend=False, blk_scns=1, blk_subs=1):

    # alternative plot setup
    n_subs = df.shape[0]
    n_scns = df.shape[1]

    unit = 0.080
    ws = [unit * n_subs * SCALE, unit * SCALE, unit * SCALE, 1.00 * SCALE]
    hs = [1.00 * SCALE, unit * n_scns * SCALE]
    lo_pad_w, md_pad_w, hi_pad_w = 0.750 * SCALE, unit * SCALE, 0.050 * SCALE
    lo_pad_h, md_pad_h, hi_pad_h = 0.600 * SCALE, unit * SCALE, 0.050 * SCALE
    #lo_pad_h, md_pad_h, hi_pad_h = 0.450 * SCALE, unit * SCALE, 0.050 * SCALE
    fig_w = sum(ws) + lo_pad_w + hi_pad_w + (len(ws) - 1) * md_pad_w
    fig_h = sum(hs) + lo_pad_h + hi_pad_h + (len(hs) - 1) * md_pad_h
    fig, axes = plt.subplots(len(hs), len(ws),
                             gridspec_kw={'width_ratios': ws, 'height_ratios': hs},
                             figsize=(fig_w, fig_h))
    plt.subplots_adjust(left=lo_pad_w / fig_w,
                        bottom=lo_pad_h / fig_h,
                        right=(fig_w - hi_pad_w) / fig_w,
                        top=(fig_h - hi_pad_h) / fig_h,
                        wspace=md_pad_w / sum(ws) * len(ws),
                        hspace=md_pad_h / sum(hs) * len(hs))

    # setup
    K = df.columns.tolist()
    indicator = df.notna()
    sub_cnt = indicator.sum(axis=1)
    scenario_cnt = indicator.sum(axis=0)
    
    # main
    axes[1, 0].imshow(df.T, cmap=severity, vmin=0, vmax=r_hat, aspect='auto')
    axes[1, 0].set_xlim([-1, sub_cnt.shape[0]])
    axes[1, 0].set_xticks(range(sub_cnt.shape[0]))
    axes[1, 0].set_xticklabels(sub_cnt.index, rotation=90)
    axes[1, 0].set_xlabel('Scenarios')
    axes[1, 0].set_ylim(-1, len(K))
    axes[1, 0].set_yticks(range(len(K)))
    axes[1, 0].set_yticklabels(K)
    axes[1, 0].set_ylabel('Substations')

    # ev
    axes[1, 1].imshow(df_ev.T, cmap=severity, vmin=0, vmax=r_hat, aspect='auto')
    axes[1, 1].set_xticks([0])
    axes[1, 1].set_xticklabels(['EV'], rotation=90)
    axes[1, 1].set_yticks([])
    
    # mv
    axes[1, 2].imshow(df_mv.T, cmap=severity, vmin=0, vmax=r_hat, aspect='auto')
    axes[1, 2].set_xticks([0])
    axes[1, 2].set_xticklabels(['MV'], rotation=90)
    axes[1, 2].set_yticks([])

    # top
    labels = [int(l.get_text()) for l in axes[1, 0].get_xticklabels()]
    ticks = axes[1, 0].get_xticks()
    dat = {i: defaultdict(int) for i in range(r_hat+1)}
    for k, cnts in df.apply(lambda col: defaultdict(int, Counter(col)), axis=1).items():
        for i, cnt in cnts.items():
            dat[i][k] = cnt
    base = {k: 0 for k in labels}
    for r in sorted(dat, reverse=True):
        if r >= 0:
            cnts = dat[r]
            extend = [cnts[k] for k in labels]
            bottom = [base[k] for k in labels]
            axes[0, 0].bar(ticks, extend, bottom=bottom, width=1.0, color=severity.colors[r])
            for k in labels:
                base[k] += cnts[k]
    if gridlines:
        axes[0, 0].grid(which='major', axis='y')
    ymax = df.shape[1]
    yticks = range(0, ymax + (ymax % blk_scns == 0), blk_scns)
    axes[0, 0].set_ylim(0, ymax)
    axes[0, 0].set_yticks(yticks)
    axes[0, 0].set_ylabel('\# of Flooded\nSubstations')
    axes[0, 0].set_xticks([])

    # right
    labels = [int(l.get_text()) for l in axes[1, 0].get_yticklabels()]
    ticks = axes[1, 0].get_yticks()
    dat = {i: defaultdict(int) for i in range(r_hat+1)}
    for omega, cnts in df.apply(lambda row: defaultdict(int, Counter(row)), axis=0).items():
        for i, cnt in cnts.items():
            dat[i][omega] = cnt
    base = {omega: 0 for omega in labels}
    for r in sorted(dat, reverse=True):
        if r >= 0:
            cnts = dat[r]
            extend = [cnts[omega] for omega in labels]
            left = [base[omega] for omega in labels]
            axes[1, -1].barh(ticks, extend, left=left, height=1.0, color=severity.colors[r])
            for omega in labels:
                base[omega] += cnts[omega]
    if gridlines:
        axes[1, -1].grid(which='major', axis='x')
    xmax = df.shape[0]
    xticks = range(0, xmax + (xmax % blk_subs == 0), blk_subs)
    axes[1, -1].set_xlim(0, xmax)
    axes[1, -1].set_xticks(xticks)
    axes[1, -1].set_xlabel('\# of Flooding\nOccurences')
    axes[1, -1].set_yticks([])

    # put legend in the unused subplots
    for ax in axes[0, 1:]:
        ax.remove()
    gs = axes[0, 1].get_gridspec()
    axbig = fig.add_subplot(gs[0, 1:])
    if legend:
        handles = [
            patches.Rectangle((0, 0), 0, 0, color=color, label=f'$r={r}$')
            for r, color in enumerate(severity.colors)
        ]
        axbig.legend(handles=handles,
                     edgecolor='none',
                     facecolor='white',
                     fancybox=False,
                     title='Flood Level',
                     loc='center')
    axbig.set_xticks([])
    axbig.set_yticks([])

    # reset xmin, xmax, ymin, ymax
    xmin, xmax = axes[1, 0].get_xlim()
    axes[1, 0].set_xlim(xmin + 0.5, xmax - 0.5)
    axes[1, 1].set_xlim(-0.5, 0.5)
    axes[1, 2].set_xlim(-0.5, 0.5)
    ymin, ymax = axes[1, 0].get_ylim()
    axes[1, 0].set_ylim(ymin + 0.5, ymax - 0.5)
    axes[1, 1].set_ylim(ymin + 0.5, ymax - 0.5)
    axes[1, 2].set_ylim(ymin + 0.5, ymax - 0.5)
    # copy to other axes
    axes[0, 0].set_xlim(axes[1, 0].get_xlim())
    axes[1, -1].set_ylim(axes[1, 0].get_ylim())
    # finish
    print(fig_w, fig_h)
    return fig, axes

In [None]:
def row_simple(omega):
    return omega

def row_total(omega):
    global row_cnts
    return sum(row_cnts[r][omega] for r in range(r_hat, 0, -1))

def row_weighted(omega):
    global row_cnts
    return sum(r * row_cnts[r][omega] for r in range(r_hat, -1, -1))

def row_ordered(omega):
    global row_cnts
    return tuple(row_cnts[r][omega] for r in range(r_hat, -1, -1))

def row_total_then_ordered(omega):
    global row_cnts
    return (
        sum(row_cnts[r][omega] for r in range(r_hat, 0, -1)),
        *[row_cnts[r][omega] for r in range(r_hat, -1, -1)]
    )

def row_cluster(k):
    global cluster_idx, cluster_idx_mean, cluster_idx_ct
    return (
        cluster_idx_ct.loc[cluster_idx.loc[k]],
        row_ordered(k)
    )

def col_simple(k):
    return k

def col_total(k):
    global col_cnts
    return sum(col_cnts[r][k] for r in range(r_hat, 0, -1))

def col_weighted(k):
    global col_cnts
    return sum(r * col_cnts[r][k] for r in range(r_hat, -1, -1))

def col_ordered(k):
    global col_cnts
    return tuple(col_cnts[r][k] for r in range(r_hat, -1, -1))

def col_total_then_ordered(k):
    global col_cnts
    return (
        sum(col_cnts[r][k] for r in range(r_hat, 0, -1)),
        *[col_cnts[r][k] for r in range(r_hat, -1, -1)]
    )

def col_cluster(k):
    global cluster_col, cluster_col_mean, cluster_col_ct
    return (
        cluster_col_ct.loc[cluster_col.loc[k]],
        col_ordered(k)
    )

In [None]:
for casestudy in casestudies:

    event = casestudy.split('-')[0]

    # inputs
    with open(SPECS_FLOOD_YAMLS[casestudy,]) as fh:
        specs = yaml.load(fh, Loader=yaml.Loader)

    with open(SPECS_FLOOD_YAMLS[casestudy, 'ev']) as fh:
        specs_ev = yaml.load(fh, Loader=yaml.Loader)

    with open(SPECS_FLOOD_YAMLS[casestudy, 'mv']) as fh:
        specs_mv = yaml.load(fh, Loader=yaml.Loader)

    # preprocessing
    df = pd.DataFrame(specs['xi'].keys(), columns=['k', 'r', 'omega'])
    df = df.groupby(['k', 'omega'])['r'].max().reset_index()
    df = pd.pivot_table(df, index='k', columns='omega', values='r').fillna(0)
    df = df.T
    
    df_ev = pd.DataFrame(specs_ev['xi'].keys(), columns=['k', 'r', 'omega'])
    df_ev = df_ev.groupby(['k', 'omega'])['r'].max().reset_index()
    df_ev = pd.pivot_table(df_ev, index='k', columns='omega', values='r').fillna(0)
    df_ev = df_ev.T
    
    df_mv = pd.DataFrame(specs_mv['xi'].keys(), columns=['k', 'r', 'omega'])
    df_mv = df_mv.groupby(['k', 'omega'])['r'].max().reset_index()
    df_mv = pd.pivot_table(df_mv, index='k', columns='omega', values='r').fillna(0)
    df_mv = df_mv.T

    df = df.applymap(int).clip(0, r_hat)
    df_ev = df_ev.applymap(int).clip(0, r_hat)
    df_mv = df_mv.applymap(int).clip(0, r_hat)

    # count number of instances of each flooding level for substations and scenarios
    row_cnts = {i: df.eq(i).sum(axis=1) for i in range(r_hat, -1, -1)}
    col_cnts = {i: df.eq(i).sum(axis=0) for i in range(r_hat, -1, -1)}

    # clustering of scenarios
    n_idx_clusters = 1 if 'imelda' in casestudy else 3
    km_idx = KMeans(n_idx_clusters)
    km_idx.fit(df)
    cluster_idx = pd.Series(index=df.index, data=km_idx.labels_)
    cluster_idx_mean = df.mean(axis=1).groupby(cluster_idx).mean()
    cluster_idx_ct = df.gt(0).sum(axis=1).groupby(cluster_idx).mean()

    # clustering of substations
    n_col_clusters = 1 if 'imelda' in casestudy else 5
    km_col = KMeans(n_col_clusters)
    km_col.fit(df.T)
    cluster_col = pd.Series(index=df.columns, data=km_col.labels_)
    cluster_col_mean = df.mean(axis=0).groupby(cluster_col).mean()
    cluster_col_ct = df.T.gt(0).sum(axis=1).groupby(cluster_col).mean()

    # reindex
    row_new = sorted(df.index, key=row_simple, reverse=False)
    col_new = sorted(df.columns, key=col_cluster, reverse=True)
    df = df.loc[row_new, col_new]
    df_ev = df_ev.loc[:, col_new]
    df_mv = df_mv.loc[:, col_new]

    # plot
    blk_scns = 5 if 'imelda' in casestudy else 20
    blk_subs = 1 if 'imelda' in casestudy else 5
    fig, axes = plot(df, df_ev, df_mv, legend=True, blk_scns=blk_scns, blk_subs=blk_subs)
    # draw lines between k-means clusters
    #ylines = cluster_col.value_counts().loc[cluster_col_ct.sort_values(ascending=False).index].cumsum()
    #for line in ylines.iloc[:-1]:
        #axes[1, 0].plot(axes[1, 0].get_xlim(), [line - 0.5] * 2, color='black', linestyle='--')
        #axes[1, 1].plot([-0.5, 0.5], [line - 0.5] * 2, color='black', linestyle='-')
        #axes[1, 2].plot([-0.5, 0.5], [line - 0.5] * 2, color='black', linestyle='-')
        #axes[1, -1].plot([0, df.shape[0]], [line - 0.5] * 2, color='black', linestyle='--')
    #xlines = cluster_idx.value_counts().loc[cluster_idx_ct.sort_values(ascending=False).index].cumsum()
    #for line in xlines.iloc[:-1]:
        #axes[1, 0].plot([line - 0.5] * 2, axes[1, 0].get_ylim(), color='black', linestyle='--')
        #axes[0, 0].plot([line - 0.5] * 2, [0, df.shape[1]], color='black', linestyle='--')

    # outputs
    output_filename_jpg = os.path.join(FIG_RESULTS_DIR, f'ijoc-{casestudy}-uncertainty-heatmap.jpg')
    plt.savefig(output_filename_jpg, format='jpg', dpi=256)
    plt.close()
    display(Image(output_filename_jpg))