# Simulation analysis

In [242]:
import pandas as pd
import pickle
from os import listdir
from os.path import join
import plotly.graph_objects as go
import regex as re
import itertools

STEPS_IN_HOUR = 120
INPUT_PATH = 'pickle'

In [243]:
data_collector_files = [f for f in listdir(INPUT_PATH) if re.match('datacollector', f)]
data_collector_files

['datacollector$prob_coda4_jockey2$1630369298.pkl',
 'datacollector$codacondivisa_7$1631022038.pkl',
 'datacollector$coda3_jockey2$1630369287.pkl',
 'datacollector$validazione_1$1630369269.pkl',
 'datacollector$prob_coda1_jockey1$1630369261.pkl',
 'datacollector$coda2_jockey2$1630369254.pkl',
 'datacollector$coda1_jockey1_6$1631022379.pkl',
 'datacollector$validazione_3$1630369291.pkl',
 'datacollector$coda3_jockey1$1630369285.pkl',
 'datacollector$prob_coda2_jockey2$1630369266.pkl',
 'datacollector$coda1_jockey1_8$1631022797.pkl',
 'datacollector$prob_coda3_jockey1$1630369303.pkl',
 'datacollector$codacondivisa_5$1631022600.pkl',
 'datacollector$validazione_4$1630369283.pkl',
 'datacollector$codacondivisa_8$1631021968.pkl',
 'datacollector$codacondivisa$1630369237.pkl',
 'datacollector$codacondivisa_6$1631022519.pkl',
 'datacollector$coda4_jockey1$1630369264.pkl',
 'datacollector$prob_coda1_jockey2$1630369294.pkl',
 'datacollector$coda1_jockey1_5$1631022921.pkl',
 'datacollector$valid

In [244]:
def read_pickle_file(filename):
    with open(join(INPUT_PATH, filename), 'rb') as f:
        return pickle.load(f)

def read_simulation(filename, df_arrivals, steps_in_hour=STEPS_IN_HOUR):
    simulation_dict = read_pickle_file(filename)
    simulation_df = pd.DataFrame(simulation_dict)
    # Add simulation name
    simulation_name = filename.split('$')[1]
    full_length = steps_in_hour * len(df_arrivals)
    q, r = len(simulation_df) // full_length, len(simulation_df) % full_length
    simulation_df["simulation_name"] = [simulation_name] * (q * full_length + r)
    # Add hours
    hours = [([hour]*steps_in_hour) for hour in df_arrivals["hour"]] * q
    hours = hours + [[df_arrivals["hour"].iloc[-1]] * (len(simulation_df) % full_length)]
    simulation_df["hour"] = list(itertools.chain(*hours))

    return simulation_df

df_arrivals = read_pickle_file('df_arrivals_aggregated.pkl')

df_simulations = pd.concat([read_simulation(x, df_arrivals) for x in data_collector_files])
df_simulations.head()

Unnamed: 0,Total_customers,Density_total,Flow_total,Density_standard,Flow_standard,Density_self_scan,Flow_self_scan,Total_steps,Avg_waiting_times_standard,Avg_waiting_times_self_scan,Number_exiting_customers,simulation_name,hour
0,1,0.045455,0.045455,0.0,0.0,0.2,0.2,2,0.0,0.0,0,prob_coda4_jockey2,8
1,2,0.090909,0.045455,0.058824,0.058824,0.2,0.0,3,0.0,0.0,0,prob_coda4_jockey2,8
2,3,0.136364,0.045455,0.117647,0.058824,0.2,0.0,4,0.0,0.0,0,prob_coda4_jockey2,8
3,3,0.136364,0.0,0.117647,0.0,0.2,0.0,5,0.0,0.0,0,prob_coda4_jockey2,8
4,4,0.181818,0.045455,0.176471,0.058824,0.2,0.0,6,0.0,0.0,0,prob_coda4_jockey2,8


In [271]:
aggregate_hours = lambda df: df.groupby(by=['hour']).mean().reset_index()

def add_simulation_to_plot(
    fig, df_simulations, simulation_name, feature,
    x_axis="hour", normalize=True, line=None, mode='lines+markers'):
    if not line:
        line=dict()

    target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
    df = aggregate_hours(target_simulation)
    y_values = df[feature]
    x_values = df[x_axis]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=x_values, y=y_values,
                             mode=mode, name=simulation_name, line=line))
    return fig


def add_ground_truth(fig, df_arrivals, normalize=True):

    y_values = df_arrivals["value"]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=df_arrivals["hour"], y=y_values,
                             mode='lines+markers', name='Ground truth'))

    return fig

def decorate_figure(fig, title='', y_axis_title='', normalize=False, dtick=15):
    if normalize:
        dtick = 0.01
    
    fig = fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = 1
        ),
        yaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = dtick
        ),
        title={
            'text': title + ' (Normalized)' if normalize else '',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title=y_axis_title + ' (Normalized)' if normalize else '',
        legend_title="Distribution",
    )
    return fig

## Validazione


In [246]:
# Validazione

normalize = True

fig =  go.Figure()
fig = add_ground_truth(fig, df_arrivals, normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Total_customers", normalize=normalize)
fig = decorate_figure(fig,
    title="Total customers - Real data vs simulation",
    y_axis_title="Number of incoming customers",
    normalize=normalize)
fig.show()

## Average waiting time

In [247]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()

## Average waiting time - No jockey

In [248]:
# Average waiting time
normalize = False

fig = create_blank_figure(
    title="Average waiting time",
    y_axis_title="Value", 
    dtick=1,
    normalize=normalize)

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", 
    dtick=1,
    normalize=normalize)

fig =  fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()

In [249]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda3_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda3_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()

In [250]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()

In [251]:
# Average waiting time
normalize = False

fig =  go.Figure()

dash_line=dict(dash='dash')

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_6', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_7', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_8', "Avg_waiting_times_standard", normalize=normalize)

fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_6', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_7', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_8', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_5', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_5', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=5)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()

In [252]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'self_scan', "Avg_waiting_times_self_scan", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()

In [253]:
# Average waiting time
normalize = False

fig =  go.Figure()

import plotly.express as px

colors = px.colors.qualitative.Plotly

fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[0], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[1], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[2], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[3], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[0]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[1]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[2]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[3]))


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )


fig.show()

In [287]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione1', "Flow_total",
    x_axis='Density_total', normalize=normalize)
fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione2', "Flow_total",
    x_axis='Density_total', normalize=normalize)
fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione3', "Flow_total",
    x_axis='Density_total', normalize=normalize)
fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione4', "Flow_total",
    x_axis='Density_total', normalize=normalize)

fig.show()

In [314]:
simulation_name = 'validazione_1'
feature="Flow_total"
x_axis='Density_total'

target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
df = aggregate_hours(target_simulation)
y_values = df[feature]
x_values = df[x_axis]
y_values, x_values

(0     0.037179
 1     0.046154
 2     0.062179
 3     0.071474
 4     0.074038
 5     0.069231
 6     0.066026
 7     0.063462
 8     0.067308
 9     0.067949
 10    0.064744
 11    0.056410
 12    0.042628
 13    0.034295
 14    0.002920
 Name: Flow_total, dtype: float64,
 0     1.012500
 1     2.071474
 2     3.083013
 3     3.331731
 4     3.388141
 5     3.575641
 6     3.032051
 7     3.327564
 8     3.301923
 9     3.485897
 10    3.577244
 11    2.640064
 12    2.090705
 13    1.718269
 14    0.400482
 Name: Density_total, dtype: float64)

In [315]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='markers'))