#### Single model data analysis

In [None]:
from data import data

import pickle

import pandas as pd
import numpy as np
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
from ema_workbench.analysis import dimensional_stacking, feature_scoring

from itertools import product

In [None]:
# Define parameter ranges
av_cost_factors = [1.0, 0.5, 0.25]
av_vot_factors = [1.0, 0.5, 0.25]
av_densities = [1.5, 1.0, 0.5]
induced_demands = [1.0, 1.25, 1.5]

# Generate all combinations of parameters
param_combinations = list(product(av_cost_factors, av_vot_factors, av_densities, induced_demands))

In [None]:
area_names = data.mrdh65_to_name

journeys_dict = {}
uxsim_dict = {}
parked_dict = {}

# Runs are tuples with the 4 parameter values
runs = param_combinations
load_names = [f"av_cost_{av_cost_factor}_av_vot_{av_vot_factor}_av_density_{av_density}_induced_{induced_demand}" for av_cost_factor, av_vot_factor, av_density, induced_demand in param_combinations]
variables = ["av_cost", "av_vot", "av_density", "induced_demand"]

folder = "exp2/"
for run, load_name in zip(runs, load_names):
    try:
        with open(f"../results/{folder}uxsim_df_{load_name}.pkl", "rb") as f:
            uxsim_dict[run] = pickle.load(f)
        with open(f"../results/{folder}parked_dict_{load_name}.pkl", "rb") as f:
            parked_dict[run] = pickle.load(f)
        journeys_dict[run] = pd.read_feather(f"../results/{folder}journeys_df_{load_name}.feather")
    except:
        pass
print(f"Loaded {len(journeys_dict)} of {len(param_combinations)} runs.")

In [None]:
city_areas = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 28, 29, 31, 34, 41, 43, 44, 45]
city_area_names = [area_names[area] for area in city_areas]

trips_by_hour_chances = pd.read_pickle("../data/trips_by_hour_chances.pickle")

start_time, end_time = int(journeys_dict[runs[0]]['start_time'].min()), int(journeys_dict[runs[0]]['start_time'].max())
print(f"Start time: {start_time}, End time: {end_time}")

In [None]:
# Filter the journeys
for run, journeys_df in journeys_dict.items():
    # Throw away the first 15 minutes. Road network is not fully loaded yet, so travel times are often 0.
    journeys_dict[run] = journeys_df[journeys_df['start_time'] >= start_time + 0.25]
    # Remove all journeys with that are not finished
    journeys_dict[run] = journeys_dict[run][journeys_dict[run]['finished']]

### Journeys data

In [None]:
journey_columns = ['agent', 'origin', 'destination', 'mode', 'start_time', 'travel_time', 'end_time', 'distance', 'cost', 'perceived_cost', 'comf_perceived_cost', 'used_network', 'started', 'finished', 'act_travel_time', 'act_perceived_cost', 'o_node', 'd_node', 'vehicle', 'car_available', 'av_available', 'perceived_cost_car', 'perceived_cost_bike', 'perceived_cost_transit', 'perceived_cost_av']

In [None]:
# Create journeys_agg_df with a multi-index of the parameters
journeys_agg_df = pd.DataFrame(index=pd.MultiIndex.from_tuples(journeys_dict.keys(), names=variables))
journeys_agg_df.index.set_names(variables, inplace=True)

# Add the total number of journeys for each run
journeys_agg_df['total_journeys'] = journeys_agg_df.index.map(lambda run: len(journeys_dict[run]))

# Print the mode choice distribution
mode_counts = {}
mode_counts_weighted = {}

for run, journeys_df in journeys_dict.items():
    # Mode choice distribution
    mode_counts[run] = journeys_df['mode'].value_counts(normalize=True).to_dict()

    # Distance weighted mode choice distribution
    mode_counts_weighted[run] = journeys_df.groupby('mode', observed=True)['distance'].sum() / journeys_df['distance'].sum()

# Convert mode_counts and mode_counts_weighted to DataFrames
modes_df = pd.concat([
    pd.DataFrame.from_dict(mode_counts, orient='index').add_prefix('mode_share_'),
    pd.DataFrame.from_dict(mode_counts_weighted, orient='index').add_prefix('mode_distance_share_')
], axis=1)
# Set index names to variables
modes_df.index.set_names(variables, inplace=True)

# Combine the two DataFrames
journeys_agg_df = pd.concat([journeys_agg_df, modes_df], axis=1)
journeys_agg_df

In [None]:
# Calculate the total perceived costs per journey and add to the aggregated dataframe
journeys_agg_df["mean_perceived_cost"] = journeys_agg_df.index.map(lambda run: journeys_dict[run]["perceived_cost"].mean())
journeys_agg_df["mean_comf_perceived_cost"] = journeys_agg_df.index.map(lambda run: journeys_dict[run]["comf_perceived_cost"].mean())
journeys_agg_df["mean_travel_time"] = journeys_agg_df.index.map(lambda run: journeys_dict[run]["travel_time"].mean())
journeys_agg_df["mean_speed"] = journeys_agg_df.index.map(lambda run: journeys_dict[run]["distance"].sum() / (journeys_dict[run]["travel_time"].sum() / 3600))
# mean network speed (used_network True)
journeys_agg_df["mean_network_speed"] = journeys_agg_df.index.map(lambda run: journeys_dict[run][journeys_dict[run]["used_network"]]["distance"].sum() / (journeys_dict[run][journeys_dict[run]["used_network"]]["travel_time"].sum() / 3600))
journeys_agg_df["mean_car_speed"] = journeys_agg_df.index.map(lambda run: journeys_dict[run][journeys_dict[run]["mode"] == "car"]["distance"].sum() / (journeys_dict[run][journeys_dict[run]["mode"] == "car"]["travel_time"].sum() / 3600))
journeys_agg_df["mean_av_speed"] = journeys_agg_df.index.map(lambda run: journeys_dict[run][journeys_dict[run]["mode"] == "av"]["distance"].sum() / (journeys_dict[run][journeys_dict[run]["mode"] == "av"]["travel_time"].sum() / 3600))
journeys_agg_df

In [None]:
comfort_factors = {
    "car": 0.5,
    "bike": 1.33,
    "transit": 1,
    "av": 0.5,
}

# Create a Series of comfort factors for easy mapping
comfort_factors_series = pd.Series(comfort_factors)

# Calculate saved costs for each journey
for run, journeys_df in journeys_dict.items():
    # Calculate the comfort-perceived cost for the chosen mode (vectorized)
    journeys_df['actual_comf_perceived_cost'] = journeys_df['perceived_cost'] * journeys_df['mode'].map(comfort_factors_series)

    # Calculate the comfort-perceived cost without AVs (vectorized)
    comf_perceived_costs = journeys_df[['perceived_cost_car', 'perceived_cost_bike', 'perceived_cost_transit']].mul(
        [comfort_factors['car'], comfort_factors['bike'], comfort_factors['transit']]
    )
    journeys_df['comf_perceived_cost_no_av'] = comf_perceived_costs.min(axis=1)

    # Calculate saved costs, set to 0 if negative (vectorized)
    journeys_df['saved_comf_perceived_cost'] = np.maximum(journeys_df['comf_perceived_cost_no_av'] - journeys_df['actual_comf_perceived_cost'], 0)

    # Sum up the saved costs for each run
    journeys_agg_df.loc[run, 'saved_comf_perceived_cost'] = journeys_df['saved_comf_perceived_cost'].sum()

# Normalize the saved comfort perceived cost by the total number of journeys
journeys_agg_df['saved_comf_perceived_cost_per_trip'] = journeys_agg_df['saved_comf_perceived_cost'] / journeys_agg_df['total_journeys']

In [None]:
# Display the updated DataFrame
journeys_agg_df

In [None]:
### Get reference data
journey_ref_df = pd.read_feather("../results/journeys_df_base1_rush.feather")
journey_ref_df = journey_ref_df[journey_ref_df['start_time'] >= start_time + 0.25]
journey_ref_df = journey_ref_df[journey_ref_df['finished']]

### Calculate all the other metrics
ref_values = {}
# Mode choices
ref_values.update(journey_ref_df['mode'].value_counts(normalize=True).add_prefix('mode_share_').to_dict())
weighted = journey_ref_df.groupby('mode', observed=True)['distance'].sum() / journey_ref_df['distance'].sum()
ref_values.update(weighted.add_prefix('mode_distance_share_').to_dict())
ref_values['mode_share_av'] = 0
ref_values['mode_distance_share_av'] = 0
# Perceived costs
ref_values['mean_perceived_cost'] = journey_ref_df['perceived_cost'].mean()
ref_values['mean_comf_perceived_cost'] = journey_ref_df['comf_perceived_cost'].mean()
ref_values['mean_travel_time'] = journey_ref_df['travel_time'].mean()
ref_values['mean_speed'] = journey_ref_df['distance'].sum() / (journey_ref_df['travel_time'].sum() / 3600)
ref_values['mean_network_speed'] = journey_ref_df[journey_ref_df['used_network']]['distance'].sum() / (journey_ref_df[journey_ref_df['used_network']]['travel_time'].sum() / 3600)
ref_values['mean_car_speed'] = journey_ref_df[journey_ref_df['mode'] == 'car']['distance'].sum() / (journey_ref_df[journey_ref_df['mode'] == 'car']['travel_time'].sum() / 3600)
ref_values['mean_av_speed'] = None
# Saved comfort perceived cost
ref_values['saved_comf_perceived_cost'] = 0
ref_values['saved_comf_perceived_cost_per_trip'] = 0

print(ref_values)

In [None]:
lower_is_better = ['mode_share_car', 'mode_distance_share_car', 'mean_perceived_cost', 'mean_comf_perceived_cost', 'mean_travel_time']
neutral = ['mode_share_av', 'mode_share_transit', 'mode_distance_share_av', 'mode_distance_share_transit']
higher_is_better = list(ref_values.keys() - set(lower_is_better) - set(neutral))
print(f"Lower is better: {lower_is_better}\nNeutral: {neutral}\nHigher is better: {higher_is_better}")

In [None]:
import matplotlib

# Assuming journeys_agg_df is already defined and contains the necessary data
def create_heatmap(column='saved_comf_perceived_cost', colormap='RdYlGn', center=None, label_format=None, show=True, save=False):
    name = column.replace('_', ' ')
    # Reshape the data for the heatmap
    heatmap_data = journeys_agg_df[column].unstack(['av_cost', 'av_vot'])

    if label_format is None:
        label_format = '.3g'  # Three significant digits
    
    # Create a larger figure for better visibility
    plt.figure(figsize=(10, 8))
    
    # Create the heatmap
    # Set the center of the color map to 0 for diverging colormaps. No colorbar.
    ax = sns.heatmap(heatmap_data, annot=True, fmt=label_format, cmap=colormap, center=center, square=True)

    # Add a small black line on the color bar at the reference value, if any
    if center is not None:
        # Access the colorbar from the heatmap
        cbar = ax.collections[0].colorbar
        # Calculate the normalized position of the center value
        norm = matplotlib.colors.Normalize(vmin=cbar.vmin, vmax=cbar.vmax)
        y = norm(center)
        # Get the colorbar's axis
        cbar_ax = cbar.ax
        # For vertical colorbars, add a horizontal line and place "ref" on the left
        cbar_ax.axhline(y=y * (cbar_ax.get_ylim()[1] - cbar_ax.get_ylim()[0]) + cbar_ax.get_ylim()[0], color='black', linewidth=1)
        cbar_ax.text(-0.3, y, f'ref\n{center:{label_format}}', va='center', ha='center', color='black', fontsize=10, transform=cbar_ax.transAxes)

    # Customize x-axis ticks
    xlabels = ax.get_xticklabels()
    # Only keep the outer label if it's the middle one
    xlabels = ['\n\n'.join(reversed(label.get_text().split('-'))) if i % 3 == 1 else label.get_text().split('-')[1] for i, label in enumerate(xlabels)]
    ax.set_xticklabels(xlabels, rotation=0)
    ax.xaxis.set_tick_params(labelsize=8)
    
    # Customize y-axis ticks
    ylabels = ax.get_yticklabels()
    ylabels = [label.get_text().replace('-', '   ') if i % 3 == 1 else label.get_text().split('-')[1] for i, label in enumerate(ylabels)]
    ax.set_yticklabels(ylabels, rotation=0)
    ax.yaxis.set_tick_params(labelsize=8)
    
    # Add lines between the outer categories
    for i in range(1, len(heatmap_data.columns.levels[0])):
        ax.axvline(x=i * len(heatmap_data.columns.levels[1]), color='white', linewidth=1)
    
    for i in range(1, len(heatmap_data.index.levels[0])):
        ax.axhline(y=i * len(heatmap_data.index.levels[1]), color='white', linewidth=1)
    
    # Set the title and labels
    plt.title(f'Dimensionally-stacked heatmap of {name}', fontsize=16)
    ax.set_xlabel('AV Value of Time\nAV Cost')
    ax.set_ylabel('AV Density\nInduced Demand')
    
    # Adjust the layout and display the plot
    plt.tight_layout()
    if save:
        plt.savefig(f'../img/exp2/heatmap_{column}.png', dpi=300, bbox_inches='tight')
    if show:
        plt.show()
    else:
        plt.close()

column = 'mode_share_car'
create_heatmap(column=column, colormap='RdYlGn', center=ref_values[column], label_format='.1%', show=True, save=False)

In [None]:
for column, ref in ref_values.items():
    if column in lower_is_better:
        colormap = 'RdYlGn_r'
    if column in neutral:
        colormap = 'BrBG'  # Blue-Yellow alternatives are: 'RdBu', 'coolwarm'
    if column in higher_is_better:
        colormap = 'RdYlGn'

    if ref is not None and 0 < ref <= 1 or column in ['mode_share_av', 'mode_share_weighted_av']:
        # Set format to percent
        label_format = '.1%'
    elif ref is not None and ref >= 300 or column in ['saved_comf_perceived_cost']:
        # Set format to 0 decimal places
        label_format = '.0f'
    else:
        label_format = None
    
    create_heatmap(column=column, colormap=colormap, center=ref, label_format=label_format, show=False, save=True)