<div>
<img src='../img/WSP_red.png' style='height: 95px; float: left' alt='WSP Logo'/>
<img src='../img/austroads.png' style='height: 115px; float: right' alt='Client Logo'/>
</div>
<center><h2>AAM6201 Development of Machine-Learning Decision-Support tools for Pavement Asset Management<br>Case Study 2: Funding Allocation
</h2></center>


In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import plotly.graph_objects as go
import plotly.express as px

from src.util import *
from src.config import CONFIG
from hdbscan import HDBSCAN
from pathlib import Path

In [None]:
large_result = Path('.').resolve().parent / 'data' / 'aggregated_results.csv'
SAVE_FIG_DIR = Path('.').resolve().parent / 'reports' / 'figures'
large_result_df = pd.read_csv(large_result)
large_result_df = large_result_df[large_result_df['verification_result'] == 1] 

input_data = load_data()
cost = [
    input_data[
        pd.Series(row['selection'].strip('[]').split(', ')).astype(float) == 1
    ]['Cost'].sum() for _, row in large_result_df.iterrows()
]
layers = np.linspace(0, CONFIG['budget'], num=10) # 9 layers ~22.2$ MIL apart
color = np.digitize(cost, bins=layers, right=True)

Optional verification (slow)

In [None]:
from src.util import get_objective_value, load_data
from tqdm import tqdm

df = load_data()
for i in tqdm(range(len(large_result_df)), desc='Selection'):
    val_dict = get_objective_value(df, pd.Series(map(float, large_result_df['selection'].iloc[i].strip('[]').split(', '))))
    assert abs(large_result_df.loc[i, 'dLoS'] - val_dict['dLoS']) < 1e-7
    assert abs(large_result_df.loc[i, 'freight_perc'] - val_dict['freight_perc']) < 1e-7
    assert abs(large_result_df.loc[i, 'metro_perc'] - val_dict['metro_perc']) < 1e-7

# Test plots

## Plotly

### Plot LoS as function of percentage

In [None]:
from src.visualise import plot_output_plane, add_2d_plane, update_scenelabels, add_benchmark_pt, add_2dline
from plotly.subplots import make_subplots

filtered_df = large_result_df.copy()
# flag = (np.array(cost) > np.max(cost) / 2)
flag = np.array([True] * len(cost))

filtered_df = filtered_df[flag]
filtered_cost = np.array(cost)[flag]

z_col = 'Absolute_LoS'
x = filtered_df['metro_perc']
y = filtered_df['freight_perc']
z = filtered_df[z_col]

# x, y is percentage, z is Absolute LoS
by_range = plot_output_plane(x=x, y=y, z=z, 
    color=filtered_cost,
    make_fig=False,
    coloraxis='coloraxis',
    name='Level Of Service'
)

fig = make_subplots(rows=1, cols=1, 
    specs=[[{'type': 'scene'}] * 1],
    subplot_titles=[f'{z_col.replace("_", " ")} by Percentage']
)

fig.update_layout(width=900, height=900, showlegend=True)

# uncomment to show in 3d regions of budget split that are satisfactory
# add_valid_region_3d(fig, row=1, col=2, x_range=(0.15, 0.3), y_range=(0.6, 0.8), z_max_range=[z.min(), z.max()])

fig.add_trace(by_range, row=1, col=1) # include by percentage plot on the right

# add benchmark
selected_benchmark = input_data[input_data['$200MCost'] > 0]
if 'd' in z_col:
    benchmark_z = input_data['nd$200MPCI'].sum()
else:
    benchmark_z = input_data['n$200MPCI_After'].sum()
benchmark_x = selected_benchmark[selected_benchmark['Metro'] == 1]['Cost'].sum() / selected_benchmark['Cost'].sum()
benchmark_y = selected_benchmark[selected_benchmark['Freight'] == 1]['Cost'].sum() / selected_benchmark['Cost'].sum()

add_2d_plane(fig, row=1, col=1, x_range=(x.min(), x.max()), y_range=(y.min(), y.max()), z=benchmark_z, name='$200M Baseline Benchmark')
add_benchmark_pt(fig, benchmark_x, benchmark_y, benchmark_z, z_min=z.min(), x_range=(0, 0.3), y_range=(0, 1), row=1, col=1, name_pt='$200M Baseline Benchmark')

# add 0 penalty split
no_pen = filtered_df[(filtered_df['Metro'] == 0) & (filtered_df['Freight'] == 0)]
assert len(no_pen) == 1
no_pen_x, no_pen_y = no_pen.iloc[0]['metro_perc'], no_pen.iloc[0]['freight_perc']
no_pen_z = no_pen.iloc[0][z_col]
add_benchmark_pt(fig, no_pen_x, no_pen_y, no_pen_z, z_min=z.min(), x_range=(0, 0.3), y_range=(0, 1), row=1, col=1, color='blue', name_pt='No Penalty Data Point')

# add input dataset budget split
orig_x = input_data[input_data['Metro'] == 1]['Cost'].sum() / input_data['Cost'].sum()
orig_y = input_data[input_data['Freight'] == 1]['Cost'].sum() / input_data['Cost'].sum()
add_2dline(fig, orig_x, orig_y,  z_min=z.min(), x_range=(0, 0.3), y_range=(0, 1), row=1, col=1, color='green', name='Input Data Split')

update_scenelabels(fig, 1, 1, x_label='Metro Percentage', y_label='Freight Percentage', z_label=z_col, fontsize=10)

fig.update_coloraxes(
    colorscale='RdBu', 
    colorbar_title_text='Budget used', 
)

for trace in fig['data']:
    if trace['name'] not in ['Level Of Service', 'No Penalty Data Point', '$200M Baseline Benchmark', 'Input Data Split']:
        trace['showlegend'] = False

fig.update_layout(legend=dict(
    yanchor='top',
    y=0.99,
    xanchor='left',
    x=0.01,
))

from pathlib import Path
# code to save the plot

fig.update_scenes(
    row=1, col=1,
    patch=dict(
        camera=dict(
            up=dict(x=0, y=0, z=1),
            center=dict(x=0,y=0,z=0),
            eye=dict(x=-2,y=1,z=1.5),
        )
    )
)

fig.show()
fig.write_image(SAVE_FIG_DIR / 'Absolute_LoS_by_Percentage_200_budget_color.png', engine='kaleido', scale=1)

### Plot LoS as function of penalties

In [None]:
from src.visualise import plot_output_plane, add_2d_plane, update_scenelabels, add_benchmark_pt, add_2dline
from plotly.subplots import make_subplots

filtered_df = large_result_df.copy()
# flag = (np.array(cost) > np.max(cost) / 2)
flag = np.array([True] * len(cost))

filtered_df = filtered_df[flag]
filtered_cost = np.array(cost)[flag]

z_col = 'Absolute_LoS'
x = filtered_df['Metro']
y = filtered_df['Freight']
z = filtered_df[z_col]

# x, y is penalties , z is Absolute LoS
by_penalties = plot_output_plane(x=x, y=y, z=z, 
    color=filtered_cost,
    make_fig=False,
    coloraxis='coloraxis',
    name='Level Of Service'
)

fig = make_subplots(rows=1, cols=1, 
    specs=[[{'type': 'scene'}] * 1],
    subplot_titles=[f'{z_col.replace("_", " ")} by Percentage']
)

fig.update_layout(width=900, height=900, showlegend=True)

# uncomment to show in 3d regions of budget split that are satisfactory
# add_valid_region_3d(fig, row=1, col=2, x_range=(0.15, 0.3), y_range=(0.6, 0.8), z_max_range=[z.min(), z.max()])

fig.add_trace(by_range, row=1, col=1) # include by percentage plot on the right

update_scenelabels(fig, 1, 1, x_label='Metro Penalty', y_label='Freight Penalty', z_label=z_col, fontsize=10)

fig.update_coloraxes(
    colorscale='RdBu', 
    colorbar_title_text='Budget used', 
)

fig.show()

from pathlib import Path
# code to save the plot

fig.update_scenes(
    row=1, col=1,
    patch=dict(
        camera=dict(
            up=dict(x=0, y=0, z=1),
            center=dict(x=0,y=0,z=0),
            eye=dict(x=-2,y=1,z=1.5),
        )
    )
)

fig.update_layout(title="Absolute lange-length weighted Level of Service as function of penalties")
fig.write_image(SAVE_FIG_DIR / 'Absolute_LoS_by_Penalties_200_budget_color.png', scale=1, width=1800)

### Plot percentage as function of penalties

In [None]:
from plotly.subplots import make_subplots

filtered_df = large_result_df 

x = filtered_df['Metro']
y = filtered_df['Freight']
z = filtered_df['metro_perc']

metro_perc = plot_output_plane(x, y, z, 
    make_fig=False,
    coloraxis='coloraxis',
)
freight_perc = plot_output_plane(
    x=x,
    y=y,
    z=filtered_df['freight_perc'],
    make_fig=False,
    coloraxis='coloraxis',
)
    
fig = make_subplots(rows=1, cols=2, 
    specs=[[{'type': 'scene'}] * 2],
    horizontal_spacing=1/6,
    column_widths=[0.5, 0.5],
    subplot_titles=['Metro percentage by penalties', 'Freight percentage by penalties']
)
fig.update_layout(width=1200, height=600, margin=dict(t=100, b=100, l=100, r=100), showlegend=False)

fig.add_trace(metro_perc, row=1, col=1)
fig.add_trace(freight_perc, row=1, col=2)

update_scenelabels(fig, 1, 1, x_label='Metro Penalty', y_label='Freight Penalty', z_label='Metro Percentage', fontsize=8)
update_scenelabels(fig, 1, 2, x_label='Metro Penalty', y_label='Freight Penalty', z_label='Freight Percentage', fontsize=8)

fig.update_coloraxes(colorscale='Viridis', colorbar_title_text='Percentage')
fig.show()

In [None]:
from pathlib import Path
from data import DATA_DIR
image_dir = DATA_DIR.parent / 'reports' / 'figures'
fig.update_scenes(
    row=1, col=1,
    patch=dict(
        camera=dict(
            up=dict(x=0, y=0, z=1),
            center=dict(x=0,y=0,z=0),
            eye=dict(x=-1.5,y=1.5,z=1.5),
        )
    )
)

fig.write_image(SAVE_FIG_DIR / 'freight_metro_perc_by_pen_200.png')

Interactive web-app

In [None]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from src.util import load_data 

# Load Data
df = px.data.tips()
# Build App
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("JupyterDash Demo"),
    dcc.Graph(id='graph'),
    html.Label([
        "Urban Range",
        dcc.Dropdown(
            id='colorscale-dropdown', clearable=False,
            value='plasma', options=[
                {'label': c, 'value': c}
                for c in px.colors.named_colorscales()
            ])
    ]),
])
# Define callback to update graph
@app.callback(
    Output('graph', 'figure'),
    [Input("colorscale-dropdown", "value")]
)
def update_figure(colorscale):
    by_range = plot_output_plane(x, y, z, 
        make_fig=False,
        coloraxis='coloraxis',
        name='by_range'
    )
    by_penalties = plot_output_plane(
        x=filtered_df['Metro'], 
        y=filtered_df['Freight'], 
        z=z,
        make_fig=False,
        coloraxis='coloraxis',
        name='by_penalties'
    )
        
    fig = make_subplots(rows=1, cols=2, 
        specs=[[{'type': 'scene'}] * 2],
        subplot_titles=['Level of Service by Split', 'Level of Service by Penalty']
    )

    fig.add_trace(by_range, row=1, col=1)
    fig.add_trace(by_penalties, row=1, col=2)

    update_scenelabels(fig, 1, 1, x_label='Metro Percentage', y_label='Freight Percentage', z_label='Level of Service', fontsize=12)
    update_scenelabels(fig, 1, 2, x_label='Metro Penalty', y_label='Freight Penalty', z_label='Level of Service', fontsize=12)

    fig.update_layout(coloraxis=dict(colorscale='Viridis'), showlegend=False, width=1000, height=600)
    fig.update_coloraxes(colorscale='Viridis')

    return fig

# Run app and display result inline in the notebook
app.run_server(mode='inline')

# Density

In [None]:
filtered_df = large_result_df.iloc[:,:]
x = filtered_df['metro_perc']
y = filtered_df['freight_perc']
z = filtered_df['dLoS']
H, xedges, yedges = np.histogram2d(x, y, bins=25) # first dimension is x (i.e y axis), second dimension is y (i.e. xaxis)
alpha = max(1, np.quantile(H, 0.9))
H = alpha * np.arctan(H / alpha)
mask = (H <= np.quantile(H, 0.5)).astype(int)

x_coord, y_coord = np.where(mask == 1)
clusterer = HDBSCAN(
    metric='euclidean',
    min_cluster_size=2,
)

labels = clusterer.fit_predict(np.vstack((x_coord, y_coord)).transpose())
label_mat = np.zeros_like(mask) - 2
label_mat[x_coord, y_coord] = labels 

In [None]:
if len(set(labels)) <= 11:
    cat_map = cm.get_cmap('tab10')
elif len(set(labels)) <= 21:
    cat_map = cm.get_cmap('tab20')
else:
    cat_map = cm.get_cmap('spring')
colormap = [np.array([0, 0, 0, 1]), np.array([0.8, 0.8, 0.8, 1])]
colormap.extend([np.array(cat_map(i)) for i, lab in enumerate(set(labels)) if lab >= 0])
bounds = np.array(range(-2, len(colormap) - 1)) - 0.5
colormap = colors.ListedColormap(colormap, N=len(bounds)-1)
norm = colors.BoundaryNorm(bounds, len(bounds) - 1)


fig = plt.figure(figsize=(12, 4))
plt.subplot(131)
a = plt.imshow(label_mat,
    cmap=colormap, norm=norm,
)
plt.colorbar(a, ticks=bounds + 0.5)
plt.subplot(132)
b = plt.imshow(H, interpolation='nearest')
plt.colorbar(b)
plt.subplot(133)
c = plt.imshow(mask, interpolation='nearest', cmap='Greys')
plt.colorbar(c)

In [None]:
# color input points
label_sum = {}
label_hit = {}
for label in set(labels):
    if label < 0:
        continue
    y_in_label = y_coord[np.isin(labels, label)]
    bound_y_bin_idx = (max(0, min(y_in_label)), min(H.shape[1] - 1, max(y_in_label)))
    bound_y_edges = (yedges[bound_y_bin_idx[0]], yedges[bound_y_bin_idx[1] + 1])

    x_in_label = x_coord[np.isin(labels, label)]
    bound_x_bin_idx = (max(0, min(x_in_label)), min(H.shape[0] - 1, max(x_in_label)))
    bound_x_edges = (xedges[bound_x_bin_idx[0]], xedges[bound_x_bin_idx[1] + 1])

    in_bin = pd.Index([])
    for x_bin_idx, y_bin_idx in zip(x_in_label, y_in_label): 
        x_bin_edges = (xedges[x_bin_idx], xedges[x_bin_idx + 1])
        y_bin_edges = (yedges[y_bin_idx], yedges[y_bin_idx + 1])
        in_bin.append((x.between(*x_bin_edges) & y.between(*y_bin_edges)).index)

    z_in_bin_sum = z[x.between(*bound_x_edges) & y.between(*bound_y_edges)].sum()
    label_hit[label] = {
        'in_boundbox': x[(x.between(*bound_x_edges) & y.between(*bound_y_edges))].index,
        'in_cluster': in_bin
    }
    label_sum[label] = z_in_bin_sum / len(y_in_label) # average z over each smaller bin of a cluster
    
    

# 2D plot

Plot different slices of the 3d surface

In [None]:
import matplotlib.cm as cm
import matplotlib

df = large_result_df 
z_col = 'Absolute_LoS'
z = df[z_col]
norm = matplotlib.colors.Normalize(vmin=z.min(), vmax=z.max(), clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.get_cmap('viridis_r'))

selected_benchmark = input_data[input_data['$200MCost'] > 0]
if 'd' in z_col:
    benchmark_z = input_data['nd$200MPCI'].sum()
else:
    benchmark_z = input_data['n$200MPCI_After'].sum()
benchmark_x = selected_benchmark[selected_benchmark['Metro'] == 1]['Cost'].sum() / selected_benchmark['Cost'].sum()
benchmark_y = selected_benchmark[selected_benchmark['Freight'] == 1]['Cost'].sum() / selected_benchmark['Cost'].sum()


fig=plt.figure(figsize=(22, 12))
plt.subplot(111)
plt.scatter(df['metro_perc'], df['freight_perc'], color=list(map(mapper.to_rgba, z))) 

xlim = plt.xlim()
ylim = plt.ylim()

# benchmark
selected_benchmark = input_data[input_data['$200MCost'] > 0]
plt.axvline(x=benchmark_x, linestyle='--', color='red', linewidth=2, zorder=-1)
plt.axhline(y=benchmark_y, linestyle='--', color='red', linewidth=2, zorder=-1)
plt.scatter(x=[benchmark_x], y=[benchmark_y], color=[mapper.to_rgba(benchmark_z)], marker='^')

# add 0 penalty split
no_pen = filtered_df[(filtered_df['Metro'] == 0) & (filtered_df['Freight'] == 0)]
assert len(no_pen) == 1
no_pen_x, no_pen_y = no_pen.iloc[0]['metro_perc'], no_pen.iloc[0]['freight_perc']
plt.axvline(x=no_pen_x, linestyle='--', color='blue', linewidth=2, zorder=-1)
plt.axhline(y=no_pen_y, linestyle='--', color='blue', linewidth=2, zorder=-1)

# add input dataset budget split
orig_x = input_data[input_data['Metro'] == 1]['Cost'].sum() / input_data['Cost'].sum()
orig_y = input_data[input_data['Freight'] == 1]['Cost'].sum() / input_data['Cost'].sum()
plt.axvline(x=orig_x, linestyle='--', color='green', linewidth=2, zorder=-1)
plt.axhline(y=orig_y, linestyle='--', color='green', linewidth=2, zorder=-1)

plt.xlabel('Metro Percentage')
plt.ylabel('Freight Percentage')
title_string = "$\Delta$LoS" if z_col == 'dLoS' else "Absolute LoS"
plt.title(f'{title_string} by Percentage')
plt.grid()
xticks = (list(plt.xticks()[0]) + [benchmark_x, no_pen_x, orig_x])
yticks = (list(plt.yticks()[0]) + [benchmark_y, no_pen_y, orig_y])
plt.xticks(xticks, rotation=45)
plt.yticks(yticks)
plt.xlim(xlim)
plt.ylim(ylim)

plt.colorbar(mapper)
plt.tight_layout()
plt.savefig(SAVE_FIG_DIR / f'2d_{z_col}_by_range_200mil.jpg')
plt.show()

In [None]:
import matplotlib.cm as cm
import matplotlib

input_data = load_data()
df = large_result_df.copy()
z_col = 'dLoS'

norm = matplotlib.colors.Normalize(vmin=df[z_col].min(), vmax=df[z_col].max(), clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.get_cmap('viridis_r'))
layers = np.linspace(0, CONFIG['budget'], num=10) # 9 layers ~22.2$ MIL apart
df['budget_bins'] = np.digitize(cost, bins=layers, right=True)
fig, axs = plt.subplots(3, 3, figsize=(21, 21))
axs = axs.ravel()

for bin_idx in df['budget_bins'].unique():
    inner = df[df['budget_bins'] == bin_idx]
    z = inner[z_col]
    axs[bin_idx - 1].scatter(inner['metro_perc'], inner['freight_perc'], color=list(map(mapper.to_rgba, z)))
    axs[bin_idx - 1].set_xlim((0, 1))
    axs[bin_idx - 1].set_ylim((0, 1))
    axs[bin_idx - 1].set_xlabel('Metro Percentage')
    axs[bin_idx - 1].set_ylabel('Freght Percentage')
    axs[bin_idx - 1].set_title(f'Budget used between \${layers[bin_idx - 1] / (10**6):.2f}M and \${layers[bin_idx] / (10**6):.2f}M')
    axs[bin_idx - 1].grid(True)
    fig.colorbar(mapper, ax=axs[bin_idx - 1])

title_string = "$\Delta$LoS" if z_col == 'dLoS' else "Absolute LoS"
fig.suptitle(f'{title_string} by Percentage by Budget used')
fig.tight_layout(rect=[0, 0, 1, 0.98])
plt.savefig(SAVE_FIG_DIR / f'2d_{z_col}_200mil_layer.jpg')
plt.show()

In [None]:
input_data = load_data()
df = large_result_df.copy()
z_col = 'dLoS'

cmap = plt.get_cmap('viridis')
plt.scatter(x=df['Metro'], y=df['Freight'], c=cost, cmap='viridis', vmin=0, vmax=np.max(cost))
plt.show()