# Simulation analysis

In [103]:
import pandas as pd
import pickle
from os import listdir
from os.path import join
import plotly.graph_objects as go
import regex as re
import itertools

IMAGE_SCALE = 3
STEPS_IN_HOUR = 120
INPUT_PATH = 'pickle'

In [104]:
data_collector_files = [f for f in listdir(INPUT_PATH) if re.match('datacollector', f)]
data_collector_files

['datacollector$coda1_jockey1$1631198293.pkl',
 'datacollector$coda1_jockey1_5$1631198796.pkl',
 'datacollector$coda1_jockey1_6$1631198296.pkl',
 'datacollector$coda1_jockey1_7$1631198306.pkl',
 'datacollector$coda1_jockey1_8$1631198308.pkl',
 'datacollector$coda1_jockey2$1631198306.pkl',
 'datacollector$coda2_jockey1$1631198302.pkl',
 'datacollector$coda2_jockey2$1631198306.pkl',
 'datacollector$coda3_jockey1$1631198285.pkl',
 'datacollector$coda3_jockey2$1631198300.pkl',
 'datacollector$coda4_jockey1$1631198303.pkl',
 'datacollector$coda4_jockey2$1631198302.pkl',
 'datacollector$codacondivisa$1631198465.pkl',
 'datacollector$codacondivisa_5$1631198288.pkl',
 'datacollector$codacondivisa_6$1631198285.pkl',
 'datacollector$codacondivisa_7$1631198257.pkl',
 'datacollector$codacondivisa_8$1631198287.pkl',
 'datacollector$prob_coda1_jockey1$1631198304.pkl',
 'datacollector$prob_coda1_jockey2$1631198292.pkl',
 'datacollector$prob_coda2_jockey1$1631198286.pkl',
 'datacollector$prob_coda2_jo

In [105]:
def read_pickle_file(filename):
    with open(join(INPUT_PATH, filename), 'rb') as f:
        return pickle.load(f)

def read_simulation(filename, df_arrivals, steps_in_hour=STEPS_IN_HOUR):
    simulation_dict = read_pickle_file(filename)
    simulation_df = pd.DataFrame(simulation_dict)
    # Add simulation name
    simulation_name = filename.split('$')[1]
    full_length = steps_in_hour * len(df_arrivals)
    q, r = len(simulation_df) // full_length, len(simulation_df) % full_length
    simulation_df["simulation_name"] = [simulation_name] * (q * full_length + r)
    # Add hours
    hours = [([hour]*steps_in_hour) for hour in df_arrivals["hour"]] * q
    hours = hours + [[df_arrivals["hour"].iloc[-1]] * (len(simulation_df) % full_length)]
    simulation_df["hour"] = list(itertools.chain(*hours))

    return simulation_df

df_arrivals = read_pickle_file('df_arrivals_aggregated.pkl')

df_simulations = pd.concat([read_simulation(x, df_arrivals) for x in data_collector_files])
df_simulations.head()

Unnamed: 0,Total_customers,Density_total,Flow_total,Density_standard,Flow_standard,Density_self_scan,Flow_self_scan,Total_steps,Avg_waiting_times_standard,Avg_waiting_times_self_scan,Number_exiting_customers,simulation_name,hour
0,1,0.142857,0.142857,0.142857,0.142857,0.0,0.0,2,0.0,0.0,0,coda1_jockey1,8
1,2,0.285714,0.142857,0.285714,0.142857,0.0,0.0,3,0.0,0.0,0,coda1_jockey1,8
2,3,0.428571,0.142857,0.428571,0.142857,0.0,0.0,4,0.0,0.0,0,coda1_jockey1,8
3,3,0.428571,0.0,0.428571,0.0,0.0,0.0,5,0.0,0.0,0,coda1_jockey1,8
4,4,0.571429,0.142857,0.571429,0.142857,0.0,0.0,6,0.0,0.0,0,coda1_jockey1,8


In [106]:
aggregate_hours = lambda df: df.groupby(by=['hour']).mean().reset_index()

def add_simulation_to_plot(
    fig, df_simulations, simulation_name, feature,
    x_axis="hour", normalize=True, line=None, mode='lines+markers'):
    if not line:
        line=dict()

    target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
    df = aggregate_hours(target_simulation)
    y_values = df[feature]
    x_values = df[x_axis]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=x_values, y=y_values,
                             mode=mode, name=simulation_name, line=line))
    return fig


def add_ground_truth(fig, df_arrivals, normalize=True):

    y_values = df_arrivals["value"]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=df_arrivals["hour"], y=y_values,
                             mode='lines+markers', name='Ground truth'))

    return fig

def decorate_figure(fig, title='', x_axis_title='Hour',
                    y_axis_title='', normalize=False, dtick=15):
    if normalize:
        dtick = 0.01

    fig = fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = 1
        ),
        yaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = dtick
        ),
        title={
            'text': title + (' (Normalized)' if normalize else ''),
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title=x_axis_title,
        yaxis_title=y_axis_title + (' (Normalized)' if normalize else ''),
        legend_title="Distribution",
    )
    return fig

## Validazione


In [107]:
# Validazione

normalize = True

fig =  go.Figure()
fig = add_ground_truth(fig, df_arrivals, normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Total_customers", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Total_customers", normalize=normalize)
fig = decorate_figure(fig,
    title="Total customers - Ground truth vs simulations for validation",
    y_axis_title="Number of incoming customers",
    normalize=normalize)
fig.show()
fig.write_image("doc/report/images/results/total_customers_validation.png", scale=IMAGE_SCALE)

## Average waiting time

In [108]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = decorate_figure(fig,
    title="Average waiting time - simulations with jockey A",
    y_axis_title="Avg waiting time", dtick=1)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_jockey_a.png", scale=IMAGE_SCALE)

## Average waiting time - No jockey

In [109]:
# Average waiting time
normalize = False

fig = go.Figure()
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time - simulations for validation",
    y_axis_title="Avg waiting time",
    dtick=1,
    normalize=normalize)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_validation.png", scale=IMAGE_SCALE)

In [110]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'validazione_3', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_4', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda3_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda3_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time - simulations with jockey B",
    y_axis_title="Avg waiting time", dtick=1)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_jockey_b.png", scale=IMAGE_SCALE)

In [111]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda4_jockey2', "Avg_waiting_times_standard", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time - best simulations with jockey",
    y_axis_title="Avg waiting time", dtick=1)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_jockey_best.png", scale=IMAGE_SCALE)

In [112]:
# Average waiting time
normalize = False

fig =  go.Figure()

dash_line=dict(dash='dash')

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_6', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_7', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_8', "Avg_waiting_times_standard", normalize=normalize)

fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_6', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_7', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_8', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1_5', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'codacondivisa_5', "Avg_waiting_times_standard", normalize=normalize, line=dash_line)


fig = decorate_figure(fig,
    title="Average waiting time - jockey and parallel queues vs N-fork queue",
    y_axis_title="Avg waiting time", dtick=5)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_codacondivisa.png", scale=IMAGE_SCALE)

In [113]:
# Average waiting time
normalize = False

fig =  go.Figure()

fig = add_simulation_to_plot(fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize)
fig = add_simulation_to_plot(fig, df_simulations, 'self_scan', "Avg_waiting_times_self_scan", normalize=normalize)


fig = decorate_figure(fig,
    title="Average waiting time - standard cashdesks vs self-scan cashdesks",
    y_axis_title="Avg waiting time", dtick=1)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_selfscan.png", scale=IMAGE_SCALE)

In [114]:
# Average waiting time
normalize = False

fig =  go.Figure()

import plotly.express as px

colors = px.colors.qualitative.Plotly

fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[0], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[1], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[2], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'prob_coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[3], dash='dot'))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda1_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[0]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda1_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[1]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda2_jockey1', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[2]))
fig = add_simulation_to_plot(
    fig, df_simulations, 'coda2_jockey2', "Avg_waiting_times_standard", normalize=normalize,
    line=dict(color=colors[3]))


fig = decorate_figure(fig,
    title="Average waiting time - probabilistic vs deterministic",
    y_axis_title="Avg waiting time", dtick=1)

fig.show()
fig.write_image("doc/report/images/results/avg_wt_prob.png", scale=IMAGE_SCALE)


In [115]:
import numpy as np

def aggregate_steps(df, factor):
    df['aux'] = df.reset_index()['index'].apply(lambda v: int(np.floor(v / factor) * factor))
    return df.groupby(by=['aux']).mean().reset_index().drop(columns=['aux'])

feature='Flow_total'
x_axis='Density_standard'

fig = go.Figure()

n1, n2 = 'coda1_jockey1', 'codacondivisa'

target_simulation = df_simulations.query(f'simulation_name == "{n1}"')
target_simulation = aggregate_steps(target_simulation, 60)

x1, y1 = (target_simulation[x_axis], target_simulation[feature])

target_simulation = df_simulations.query(f'simulation_name == "{n2}"')
target_simulation = aggregate_steps(target_simulation, 60)
x2, y2 = (target_simulation[x_axis], target_simulation[feature])

fig.add_trace(go.Scatter(x=x2, y=y2, mode='markers', name=n2))
fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name=n1))


fig = decorate_figure(fig,
    title="Prova",
    x_axis_title='Density total', y_axis_title="Flow total", dtick=0.05)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [116]:
import numpy as np

def aggregate_steps(df, factor):
    df['aux'] = df.reset_index()['index'].apply(lambda v: int(np.floor(v / factor) * factor))
    return df.groupby(by=['aux']).mean().reset_index().drop(columns=['aux'])

feature='Density_standard'
x_axis='Avg_waiting_times_standard'

fig = go.Figure()

n1, n2 = 'coda1_jockey1', 'self_scan'

target_simulation = df_simulations.query(f'simulation_name == "{n1}"')
target_simulation = aggregate_steps(target_simulation, 60)

x1, y1 = (target_simulation[x_axis], target_simulation[feature])

feature='Density_self_scan'
x_axis='Avg_waiting_times_self_scan'
target_simulation = df_simulations.query(f'simulation_name == "{n2}"')
target_simulation = aggregate_steps(target_simulation, 60)
x2, y2 = (target_simulation[x_axis], target_simulation[feature])

fig.add_trace(go.Scatter(x=x2, y=y2, mode='markers', name=n2))
fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name=n1))


fig = decorate_figure(fig,
    title="KDE (Kernel Density Estimates) - standard cashdesks vs self-scan cashdesks",
    x_axis_title='Avg waiting time', y_axis_title="Density", dtick=0.05)

fig.show()
fig.write_image("doc/report/images/results/kde_self_scan.png", scale=IMAGE_SCALE)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [117]:
import numpy as np

def aggregate_steps(df, factor):
    df['aux'] = df.reset_index()['index'].apply(lambda v: int(np.floor(v / factor) * factor))
    return df.groupby(by=['aux']).mean().reset_index().drop(columns=['aux'])

feature='Density_standard'
x_axis='Avg_waiting_times_standard'

fig = go.Figure()

ns = ['codacondivisa_5', 'codacondivisa_6', 'codacondivisa_7', 'codacondivisa_8',
      'coda1_jockey1_5', 'coda1_jockey1_6', 'coda1_jockey1_7', 'coda1_jockey1_8']

for n in ns:
    target_simulation = df_simulations.query(f'simulation_name == "{n}"')
    target_simulation = aggregate_steps(target_simulation, 60)
    x1, y1 = (target_simulation[x_axis], target_simulation[feature])
    fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name=n))


fig = decorate_figure(fig,
    title="KDE (Kernel Density Estimates) - jockey and parallel queues vs N-fork queue",
    x_axis_title='Avg waiting time', y_axis_title="Density", dtick=0.05)

fig.show()
fig.write_image("doc/report/images/results/kde_codacondivisa.png", scale=IMAGE_SCALE)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [118]:
import numpy as np

def aggregate_steps(df, factor):
    df['aux'] = df.reset_index()['index'].apply(lambda v: int(np.floor(v / factor) * factor))
    return df.groupby(by=['aux']).mean().reset_index().drop(columns=['aux'])

feature='Density_standard'
x_axis='Avg_waiting_times_standard'

fig = go.Figure()

ns = ['validazione_1', 'validazione_2', 'coda1_jockey1', 'coda1_jockey2', 'coda2_jockey1', 'coda2_jockey2']

for n in ns:
    target_simulation = df_simulations.query(f'simulation_name == "{n}"')
    target_simulation = aggregate_steps(target_simulation, 60)
    x1, y1 = (target_simulation[x_axis], target_simulation[feature])
    fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name=n))


fig = decorate_figure(fig,
    title="KDE (Kernel Density Estimates) - jockey and parallel queues vs N-fork queue",
    x_axis_title='Avg waiting time', y_axis_title="Density", dtick=0.05)

fig.show()
fig.write_image("doc/report/images/results/kde_jockey_a.png", scale=IMAGE_SCALE)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

