# Simulation analysis

In [10]:
import pandas as pd
import pickle
from os import listdir
from os.path import join
import plotly.graph_objects as go
import regex as re
import itertools

STEPS_IN_HOUR = 120
INPUT_PATH = 'pickle'

In [11]:
data_collector_files = [f for f in listdir(INPUT_PATH) if re.match('datacollector', f)]
data_collector_files

['datacollector$coda1_jockey1$1630369264.pkl',
 'datacollector$coda1_jockey2$1630369239.pkl',
 'datacollector$coda2_jockey1$1630369258.pkl',
 'datacollector$coda2_jockey2$1630369254.pkl',
 'datacollector$coda3_jockey1$1630369285.pkl',
 'datacollector$coda3_jockey2$1630369287.pkl',
 'datacollector$coda4_jockey1$1630369264.pkl',
 'datacollector$coda4_jockey2$1630369303.pkl',
 'datacollector$codacondivisa$1630369237.pkl',
 'datacollector$prob_coda1_jockey1$1630369261.pkl',
 'datacollector$prob_coda1_jockey2$1630369294.pkl',
 'datacollector$prob_coda2_jockey1$1630369250.pkl',
 'datacollector$prob_coda2_jockey2$1630369266.pkl',
 'datacollector$prob_coda3_jockey1$1630369303.pkl',
 'datacollector$prob_coda3_jockey2$1630369245.pkl',
 'datacollector$prob_coda4_jockey1$1630369280.pkl',
 'datacollector$prob_coda4_jockey2$1630369298.pkl',
 'datacollector$self_scan$1630369279.pkl',
 'datacollector$validazione_1$1630369269.pkl',
 'datacollector$validazione_2$1630369272.pkl',
 'datacollector$validazi

In [12]:
def read_pickle_file(filename):
    with open(join(INPUT_PATH, filename), 'rb') as f:
        return pickle.load(f)

def read_simulation(filename, df_arrivals, steps_in_hour=STEPS_IN_HOUR):
    simulation_dict = read_pickle_file(filename)
    simulation_df = pd.DataFrame(simulation_dict)
    # Add simulation name
    simulation_name = filename.split('$')[1]
    full_length = steps_in_hour * len(df_arrivals)
    q, r = len(simulation_df) // full_length, len(simulation_df) % full_length
    simulation_df["simulation_name"] = [simulation_name] * (q * full_length + r)
    # Add hours
    hours = [([hour]*steps_in_hour) for hour in df_arrivals["hour"]] * q
    hours = hours + [[df_arrivals["hour"].iloc[-1]] * (len(simulation_df) % full_length)]
    simulation_df["hour"] = list(itertools.chain(*hours))

    return simulation_df

df_arrivals = read_pickle_file('df_arrivals_aggregated.pkl')

df_simulations = pd.concat([read_simulation(x, df_arrivals) for x in data_collector_files])
df_simulations.head()

Unnamed: 0,Total_customers,Density_total,Flow_total,Density_standard,Flow_standard,Density_self_scan,Flow_self_scan,Total_steps,Avg_waiting_times_standard,Avg_waiting_times_self_scan,Number_exiting_customers,simulation_name,hour
0,1,0.0625,0.0625,0.0,0.0,0.0,0.0,2,0.0,0.0,0,coda1_jockey1,8
1,1,0.0625,0.0,0.0,0.0,0.0,0.0,3,0.0,0.0,0,coda1_jockey1,8
2,2,0.125,0.0625,0.0,0.0,0.0,0.0,4,0.0,0.0,0,coda1_jockey1,8
3,3,0.1875,0.0625,0.0,0.0,0.0,0.0,5,0.0,0.0,0,coda1_jockey1,8
4,4,0.25,0.0625,0.0,0.0,0.0,0.0,6,0.0,0.0,0,coda1_jockey1,8


In [13]:
aggregate_hours = lambda df: df.groupby(by=['hour']).mean().reset_index()

def add_simulation_to_plot(
    fig, df_simulations, simulation_name, feature,
    x_axis="hour", normalize=True, line=None, mode='lines+markers'):
    if not line:
        line=dict()

    target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
    df = aggregate_hours(target_simulation)
    y_values = df[feature]
    x_values = df[x_axis]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=x_values, y=y_values,
                             mode=mode, name=simulation_name, line=line))
    return fig


def add_ground_truth(fig, df_arrivals, normalize=True):

    y_values = df_arrivals["value"]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=df_arrivals["hour"], y=y_values,
                             mode='lines+markers', name='Ground truth'))

    return fig

def decorate_figure(fig, title='', y_axis_title='', normalize=False, dtick=15):
    if normalize:
        dtick = 0.01
    
    fig = fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = 1
        ),
        yaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = dtick
        ),
        title={
            'text': title + ' (Normalized)' if normalize else '',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title=y_axis_title + ' (Normalized)' if normalize else '',
        legend_title="Distribution",
    )
    return fig

## Validazione


In [14]:
# Validazione

normalize = True

fig =  go.Figure()
fig = add_ground_truth(fig, df_arrivals, normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Total_customers", normalize=normalize)
fig = decorate_figure(fig,
    title="Total customers - Real data vs simulation",
    y_axis_title="Number of incoming customers",
    normalize=normalize)
fig.show()
fig.write_image("doc/report/images/results/total_customers_validation.png")

## Average waiting time

In [17]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()
fig.write_image("doc/report/images/results/avg_wt_jockey_a.png")

## Average waiting time - No jockey

In [18]:
# Average waiting time
normalize = False

fig = create_blank_figure(
    title="Average waiting time",
    y_axis_title="Value", 
    dtick=1,
    normalize=normalize)

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", 
    dtick=1,
    normalize=normalize)

fig =  fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()
fig.write_image("doc/report/images/results/avg_wt_validation.png")

NameError: name 'create_blank_figure' is not defined

In [19]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda3_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda3_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()
fig.write_image("doc/report/images/results/avg_wt_jockey_b.png")

In [24]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()
fig.write_image("doc/report/images/results/avg_wt_jockey_best.png")

In [20]:
# Average waiting time
normalize = False

fig =  go.Figure()

dash_line=dict(dash='dash')

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_6', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_7', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_8', "Avg_waiting_times_standard", normalize=normalize)

fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_6', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_7', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_8', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_5', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_5', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=5)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()
fig.write_image("doc/report/images/results/avg_wt_codacondivisa.png")

In [21]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'self_scan', "Avg_waiting_times_self_scan", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )

fig.show()
fig.write_image("doc/report/images/results/avg_wt_selfscan.png")

In [22]:
# Average waiting time
normalize = False

fig =  go.Figure()

import plotly.express as px

colors = px.colors.qualitative.Plotly

fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[0], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[1], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[2], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[3], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[0]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[1]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[2]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[3]))


fig = decorate_figure(fig,
    title="Average waiting time",
    y_axis_title="Value", dtick=1)
fig = fig.update_layout(
        title={
            'text': "title" ,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Cjao",
        legend_title="Distribution",
    )


fig.show()
fig.write_image("doc/report/images/results/avg_wt_prob.png")

In [23]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione1', "Flow_total",
    x_axis='Density_total', normalize=normalize)
fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione2', "Flow_total",
    x_axis='Density_total', normalize=normalize)
fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione3', "Flow_total",
    x_axis='Density_total', normalize=normalize)
fig = add_simulation_to_plot(
    fig, df_simulations, 'validazione4', "Flow_total",
    x_axis='Density_total', normalize=normalize)

fig.show()

In [None]:
simulation_name = 'validazione_1'
feature="Flow_total"
x_axis='Density_total'

target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
df = aggregate_hours(target_simulation)
y_values = df[feature]
x_values = df[x_axis]
y_values, x_values

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='markers'))